Data set: Dry Bean
Model: Random Forest (Ensemle)
Steps: 
1. Load data.
2. Create Model
3. Evaluate Model
5. Generate 6 metrics

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, matthews_corrcoef
)

from sklearn.ensemble import RandomForestClassifier

In [3]:
#Read the dataset
df = pd.read_excel("/Users/stalukda/Documents/Automation_exercise/BITS-ML-AS-2/Dry_Bean_Dataset.xlsx") 
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [5]:
#Create an instance of the Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)

In [6]:
#Evaluate the model
model_name = "Random Forest"

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
y_prob = rf.predict_proba(X_test)
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob, multi_class='ovr')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
mcc = matthews_corrcoef(y_test, y_pred)

print(f"\nModel: {model_name}")
print(f"Accuracy: {accuracy:.4f}")
print(f"AUC Score: {auc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"MCC: {mcc:.4f}")


Model: Random Forest
Accuracy: 0.9209
AUC Score: 0.9918
Precision: 0.9209
Recall: 0.9209
F1 Score: 0.9209
MCC: 0.9044


In [7]:
import joblib
joblib.dump(rf, "random_forest.pkl")

['random_forest.pkl']