In [26]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=15)
mpl.rc('xtick', labelsize=15)
mpl.rc('ytick', labelsize=15)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=100):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# taking fetal data set

df = pd.read_csv("../input/fetal-health-classification/fetal_health.csv")

df = df.rename(columns = {'baseline value':'baseline_value', 'prolongued_decelerations': 'prolonged_decelerations'})

df = df.filter(['prolonged_decelerations','abnormal_short_term_variability', 'fetal_health'])


X_raw = df.drop('fetal_health', axis=1)
y = df['fetal_health']
from sklearn.preprocessing import StandardScaler
scale_X = StandardScaler()
X = pd.DataFrame(scale_X.fit_transform(X_raw), columns = X_raw.columns)
X=X.values
tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)
tree_clf.fit(X, y)

from sklearn.tree import export_graphviz

def image_path(fig_id):
    return os.path.join(IMAGES_PATH, fig_id)

export_graphviz(
        tree_clf,
        out_file=image_path("fetal.dot"),
        feature_names=['prolonged_decelerations',  'abnormal_short_term_variability'],
        class_names=["1","2","3"],
        rounded=True,
        filled=True
    )

from matplotlib.colors import ListedColormap

def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], legend=True, plot_training=True):
    x1s = np.linspace(axes[0], axes[1], 50)
    x2s = np.linspace(axes[2], axes[3], 50)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
    custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
    plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
    if plot_training:
        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "yo", label="1")
        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "bs", label="2")
        plt.plot(X[:, 0][y==3], X[:, 1][y==3], "g^", label="3")
        plt.axis(axes)
    
    plt.xlabel("prolonged_decelerations", fontsize=30)
    plt.ylabel("abnormal_short_term_variability", fontsize=30)
    
    if legend:
        plt.legend(loc="upper left", fontsize=20)

plt.figure(figsize=(10, 8))
plot_decision_boundary(tree_clf, X, y)
plt.plot([0, 13], [1.1,0], "k-", linewidth=2)
plt.plot([2, 2], [0,3], "k--", linewidth=2)
plt.text(1.40, 1.0, "Depth=0", fontsize=15)
plt.text(4.05, 0.5, "(Depth=1)", fontsize=15)

save_fig("Lab4_decision_tree_decision_boundaries_plot")
plt.show()

# Predicting classes and class probabilities
tree_clf.predict([[5, 1.5]])
tree_clf.predict_proba([[5, 1.5]])

