In [1]:
#Importing Required Libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    confusion_matrix,
    classification_report
)

In [2]:
data = pd.read_csv("C:/Users/Lingesh.L/Downloads/data.csv") # Load dataset

data.drop(columns=["id", "Unnamed: 32"], inplace=True) # Drop unnecessary columns

In [3]:
data["diagnosis"] = data["diagnosis"].map({"B": 0, "M": 1}) # Encode target variable

X = data.drop("diagnosis", axis=1) # Features 
y = data["diagnosis"] #target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
) #Trainâ€“Test Split

In [5]:
dt_model = DecisionTreeClassifier(
    criterion="gini",     # or "entropy"
    max_depth=None,       # full tree
    random_state=42
)

dt_model.fit(X_train, y_train)


DecisionTreeClassifier(random_state=42)

In [6]:
y_pred = dt_model.predict(X_test)
y_pred_proba = dt_model.predict_proba(X_test)[:, 1]

In [7]:
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)

print("Decision Tree Performance")
print(f"Accuracy  : {accuracy:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")
print(f"MCC       : {mcc:.4f}")

Decision Tree Performance
Accuracy  : 0.9298
AUC       : 0.9246
Precision : 0.9048
Recall    : 0.9048
F1 Score  : 0.9048
MCC       : 0.8492


In [8]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[68  4]
 [ 4 38]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        72
           1       0.90      0.90      0.90        42

    accuracy                           0.93       114
   macro avg       0.92      0.92      0.92       114
weighted avg       0.93      0.93      0.93       114



In [9]:
joblib.dump(dt_model, "C:/Users/Lingesh.L/Downloads/pkl/decision_tree.pkl")

['C:/Users/Lingesh.L/Downloads/pkl/decision_tree.pkl']