In [31]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [35]:
df = pd.read_csv("heart.csv")  
print("Dataset Loaded Successfully!")
print(df.head())
print(df.info())
print(df.describe())
print("Missing Values:\n", df.isna().sum())

Dataset Loaded Successfully!
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol  

In [39]:
X = df.drop("target", axis=1)   
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("\nTrain/Test Split Done!")
print("Train Shape:", X_train.shape)
print("Test Shape:", X_test.shape)



Train/Test Split Done!
Train Shape: (820, 13)
Test Shape: (205, 13)


In [41]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

print("\n=== DECISION TREE (DEFAULT) ===")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_dt))


=== DECISION TREE (DEFAULT) ===
Accuracy: 0.9853658536585366

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       100
           1       1.00      0.97      0.99       105

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

Confusion Matrix:
 [[100   0]
 [  3 102]]


In [43]:
os.makedirs("images", exist_ok=True)

plt.figure(figsize=(20, 10))
plot_tree(
    dt,
    feature_names=X.columns,
    class_names=[str(c) for c in np.unique(y)],
    filled=True,
    rounded=True,
    fontsize=8
)
plt.title("Decision Tree - Full Depth")
plt.tight_layout()
plt.savefig("images/decision_tree_full.png", dpi=300, bbox_inches="tight")
plt.close()

In [45]:
depths = [2, 3, 4, 5, 6, 8, 10, None]
train_acc = []
test_acc = []

for d in depths:
    model = DecisionTreeClassifier(max_depth=d, random_state=42)
    model.fit(X_train, y_train)
    train_acc.append(model.score(X_train, y_train))
    test_acc.append(model.score(X_test, y_test))

print("\n=== DEPTH vs ACCURACY ===")
for d, ta, tsa in zip(depths, train_acc, test_acc):
    print(f"Depth={d} | Train Acc={ta:.4f} | Test Acc={tsa:.4f}")


plt.figure(figsize=(8, 5))
depth_labels = [str(d) for d in depths]
plt.plot(depth_labels, train_acc, marker='o', label="Train Accuracy")
plt.plot(depth_labels, test_acc, marker='o', label="Test Accuracy")
plt.xlabel("Max Depth")
plt.ylabel("Accuracy")
plt.title("Max Depth vs Accuracy")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("images/depth_vs_accuracy.png", dpi=300, bbox_inches="tight")
plt.close()


=== DEPTH vs ACCURACY ===
Depth=2 | Train Acc=0.7695 | Test Acc=0.7220
Depth=3 | Train Acc=0.8451 | Test Acc=0.8537
Depth=4 | Train Acc=0.8854 | Test Acc=0.8390
Depth=5 | Train Acc=0.9293 | Test Acc=0.8732
Depth=6 | Train Acc=0.9659 | Test Acc=0.9220
Depth=8 | Train Acc=0.9976 | Test Acc=0.9756
Depth=10 | Train Acc=1.0000 | Test Acc=0.9854
Depth=None | Train Acc=1.0000 | Test Acc=0.9854


In [47]:
best_depth = 4   

dt_best = DecisionTreeClassifier(max_depth=best_depth, random_state=42)
dt_best.fit(X_train, y_train)
y_pred_dt_best = dt_best.predict(X_test)

print(f"\n=== DECISION TREE (BEST DEPTH = {best_depth}) ===")
print("Accuracy:", accuracy_score(y_test, y_pred_dt_best))
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt_best))


plt.figure(figsize=(20, 10))
plot_tree(
    dt_best,
    feature_names=X.columns,
    class_names=[str(c) for c in np.unique(y)],
    filled=True,
    rounded=True,
    fontsize=8
)
plt.title(f"Decision Tree (max_depth={best_depth})")
plt.tight_layout()
plt.savefig(f"images/decision_tree_depth_{best_depth}.png", dpi=300, bbox_inches="tight")
plt.close()


=== DECISION TREE (BEST DEPTH = 4) ===
Accuracy: 0.8390243902439024

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.80      0.83       100
           1       0.82      0.88      0.85       105

    accuracy                           0.84       205
   macro avg       0.84      0.84      0.84       205
weighted avg       0.84      0.84      0.84       205



In [49]:
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("\n=== RANDOM FOREST ===")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))


=== RANDOM FOREST ===
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       100
           1       1.00      1.00      1.00       105

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205

Confusion Matrix:
 [[100   0]
 [  0 105]]


In [51]:
print("\n=== FEATURE IMPORTANCE (RF) ===")
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]

for i in indices:
    print(f"{X.columns[i]}: {importances[i]:.4f}")

plt.figure(figsize=(10, 6))
plt.bar(range(len(indices)), importances[indices])
plt.xticks(range(len(indices)), X.columns[indices], rotation=90)
plt.title("Random Forest Feature Importance")
plt.tight_layout()
plt.savefig("images/rf_feature_importance.png", dpi=300, bbox_inches="tight")
plt.close()



=== FEATURE IMPORTANCE (RF) ===
cp: 0.1421
thalach: 0.1173
ca: 0.1148
oldpeak: 0.1126
thal: 0.0959
age: 0.0913
chol: 0.0778
exang: 0.0737
trestbps: 0.0678
slope: 0.0487
sex: 0.0267
restecg: 0.0204
fbs: 0.0108


In [53]:
dt_cv = DecisionTreeClassifier(max_depth=best_depth, random_state=42)
rf_cv = RandomForestClassifier(n_estimators=100, random_state=42)

dt_scores = cross_val_score(dt_cv, X, y, cv=5)
rf_scores = cross_val_score(rf_cv, X, y, cv=5)

print("\n=== CROSS VALIDATION (5-FOLD) ===")
print(f"Decision Tree CV Mean Accuracy: {dt_scores.mean():.4f}")
print(f"Random Forest CV Mean Accuracy: {rf_scores.mean():.4f}")


print("\nAll tasks completed successfully! Check the images folder for saved plots.")


=== CROSS VALIDATION (5-FOLD) ===
Decision Tree CV Mean Accuracy: 0.8341
Random Forest CV Mean Accuracy: 0.9971

All tasks completed successfully! Check the images folder for saved plots.
