In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, plot_confusion_matrix
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("./data/heart.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
sns.displot(df, x = "output")
sns.displot(df, x= "output", hue = "sex", kind="kde")
sns.displot(df, x= "output", hue = "output")
sns.displot(df, x= "age", hue = "output", kind="kde")

In [None]:
x = df.drop("output", axis = 1).values
y = df.loc[:, "output"].values

In [None]:
x.shape

In [None]:
y.shape

In [None]:
xtr, xts, ytr, yts = train_test_split(x, y, test_size=.3, shuffle=True, stratify=y, random_state=51)

In [None]:
dtclf = DecisionTreeClassifier(criterion="entropy", random_state=51, max_depth=3)

In [None]:
dtclf.fit(xtr, ytr)

In [None]:
ypr = dtclf.predict(xts)

In [None]:
accuracy_score(yts, ypr)

In [None]:
plot_tree(dtclf, feature_names=df.drop("output", axis = 1).columns, class_names="01")

In [None]:
dps = np.arange(3, 20)
accs = np.zeros(len(dps))
for i, depth in enumerate(dps):
    dtclf = DecisionTreeClassifier(criterion="entropy", random_state=51, max_depth=depth)
    dtclf.fit(xtr, ytr)
    ypr = dtclf.predict(xts)
    accs[i] = accuracy_score(yts, ypr)
    print(f"Max Depth: {depth}. Acc: {accs[i]}")

In [None]:
confusion_matrix(yts, ypr)

In [None]:
cmd = plot_confusion_matrix(dtclf, xts, yts)
cmd.ax_.set_xticklabels(["No Risk", "Risk"])
cmd.ax_.set_yticklabels(["No Risk", "Risk"])

In [None]:
print(classification_report(yts, ypr, target_names=["No Risk", "Risk"]))