Data set: Dry Bean
Model: Decision Tree Classifier
Steps: 
1. Load data.
2. Create Model
3. Evaluate Model
5. Generate 6 metrics

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, matthews_corrcoef
)

from sklearn.tree import DecisionTreeClassifier

In [2]:
#Read the dataset
df = pd.read_excel("/Users/stalukda/Documents/Automation_exercise/BITS-ML-AS-2/Dry_Bean_Dataset.xlsx") 
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [4]:
#Create an instance of the Logistic Regression model
dt = DecisionTreeClassifier(random_state=42)

In [5]:
#Evaluate the model
model_name = "Decision Tree"

dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
y_prob = dt.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob, multi_class='ovr')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
mcc = matthews_corrcoef(y_test, y_pred)

print(f"\nModel: {model_name}")
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC Score: {auc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"MCC: {mcc:.4f}")


Model: Decision Tree
Accuracy: 0.8920
AUC Score: 0.9436
Precision: 0.8917
Recall: 0.8920
F1 Score: 0.8918
MCC: 0.8694
