In [1]:
import sklearn
assert sklearn.__version__ >= "0.20"
import numpy as np
import os
import numpy as np
import pandas as pd
import seaborn as sns
import graphviz
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from graphviz import Source
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn import datasets
from sklearn.tree import DecisionTreeRegressor

mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
    if not iris:
        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
    if plot_training:
        plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")
        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", label="Iris versicolor")
        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "g^", label="Iris virginica")
        plt.axis(axes)
    if iris:
        plt.xlabel("Petal length", fontsize=14)
        plt.ylabel("Petal width", fontsize=14)
    else:
        plt.xlabel(r"$x_1$", fontsize=18)
        plt.ylabel(r"$x_2$", fontsize=18, rotation=0)
    if legend:
        plt.legend(loc="lower right", fontsize=14)

def IrisDS():
  iris = datasets.load_iris()
  #Modelando arbol de decision
  X = iris.data[:,2:]
  y = iris.target
  tree_clf = tree.DecisionTreeClassifier(max_depth=2, random_state=30)
  tree_clf = tree_clf.fit(X, y)
  #Calculando precisión de modelo
  accuracy=tree_clf.score(X, y)
  print("Precision Modelo Iris:",accuracy)
  plot_decision_boundary(tree_clf,X,y)

  export_graphviz(
        tree_clf,
        out_file=os.path.join(IMAGES_PATH, "iris_tree.dot"),
        feature_names=iris.feature_names[2:],
        class_names=iris.target_names,
        rounded=True,
        filled=True
    )
  
  Source.from_file(os.path.join(IMAGES_PATH, "iris_tree.dot"))

  plt.figure(figsize=(8, 4))
  plot_decision_boundary(tree_clf, X, y)
  plt.plot([2.45, 2.45], [0, 3], "k-", linewidth=2)
  plt.plot([2.45, 7.5], [1.75, 1.75], "k--", linewidth=2)
  plt.plot([4.95, 4.95], [0, 1.75], "k:", linewidth=2)
  plt.plot([4.85, 4.85], [1.75, 3], "k:", linewidth=2)
  plt.text(1.40, 1.0, "Depth=0", fontsize=15)
  plt.text(3.2, 1.80, "Depth=1", fontsize=13)
  plt.text(4.05, 0.5, "(Depth=2)", fontsize=11)

  save_fig("decision_tree_decision_boundaries_plot")
  plt.show()


def WineDS():
  wine = datasets.load_wine()
  #Modelando arbol de decision
  X = wine.data
  y = wine.target
  tree_clf = tree.DecisionTreeClassifier(max_depth=2, random_state=30)
  tree_clf = tree_clf.fit(X, y)
  #Calculando precisión de modelo
  accuracy=tree_clf.score(X, y)
  print("Precision Modelo Wine:",accuracy)

  export_graphviz(
        tree_clf,
        out_file=os.path.join(IMAGES_PATH, "wine_tree.dot"),
        feature_names =wine.feature_names,
        class_names = wine.target_names,
        filled=True,
        rounded = True,
  )
  Source.from_file(os.path.join(IMAGES_PATH, "wine_tree.dot"))


def BreastCancerDS():
  breastCancer = datasets.load_breast_cancer()
  #Modelando arbol de decision
  X = breastCancer.data
  y = breastCancer.target
  tree_clf = tree.DecisionTreeClassifier(max_depth=2, random_state=30)
  tree_clf = tree_clf.fit(X, y)
  #tree.plot_tree(tree_clf)
  #Calculando precisión de modelo
  accuracy=tree_clf.score(X, y)
  print("Precision Modelo Breast Cancer:",accuracy)

  export_graphviz(
        tree_clf,
        out_file=os.path.join(IMAGES_PATH, "breastCancer_tree.dot"),
        feature_names = breastCancer.feature_names,
        class_names = breastCancer.target_names,
        filled=True,
        rounded = True,
  )
  Source.from_file(os.path.join(IMAGES_PATH, "breastCancer_tree.dot"))


def main():
  #Cargando datasets
  IrisDS()
  WineDS()
  BreastCancerDS()

if __name__ == '__main__':
  main()


ModuleNotFoundError: No module named 'sklearn'