# 🧪 Model Evaluation on Random Forest
Generated on 2025-04-18 07:37

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay,
                             classification_report, accuracy_score,
                             precision_score, recall_score, f1_score,
                             roc_auc_score, roc_curve)

np.random.seed(42)

## 1️⃣ Generate Synthetic Dataset

In [None]:
df = pd.DataFrame({
    'Age': np.random.randint(18, 70, 500),
    'Income': np.random.normal(50000, 15000, 500).astype(int),
    'Gender': np.random.choice([0, 1], 500),
    'Emergency_Admissions': np.random.randint(0, 50, 500),
    'Weekend': np.random.choice([0, 1], 500),
})
df['Purchased'] = ((df['Gender'] == 0) & (df['Emergency_Admissions'] > 25)).astype(int)
df.head()

## 2️⃣ Train-Test Split

In [None]:
X = df.drop(columns=['Purchased'])
y = df['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 3️⃣ Train Random Forest

In [None]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

## 4️⃣ Predictions & Probabilities

In [None]:
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

## 5️⃣ Confusion Matrix

In [None]:
ConfusionMatrixDisplay.from_estimator(clf, X_test, y_test)

## 6️⃣ Classification Report

In [None]:
print(classification_report(y_test, y_pred))

## 7️⃣ ROC Curve & AUC Score

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
plt.plot(fpr, tpr, label='ROC Curve')
plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.grid(True)
plt.show()

print("AUC Score:", roc_auc_score(y_test, y_proba))