In [None]:
# Random Forest vs Traditional Algorithms (Logistic Regression, Decision Tree)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# --- Load and preprocess data ---
df = pd.read_csv('dataset.csv')
X = pd.get_dummies(df.drop(columns=['purchased']), drop_first=True)
y = df['purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# --- Train models ---
rf = RandomForestClassifier(n_estimators=100, random_state=42)
dt = DecisionTreeClassifier(random_state=42)
lr = LogisticRegression(max_iter=1000, random_state=42)

rf.fit(X_train, y_train)
dt.fit(X_train, y_train)
lr.fit(X_train, y_train)

# --- Evaluate and compare ---
models = {'Random Forest': rf, 'Decision Tree': dt, 'Logistic Regression': lr}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, zero_division=0))



Random Forest Accuracy: 0.4000
              precision    recall  f1-score   support

           0       0.38      0.30      0.33        10
           1       0.42      0.50      0.45        10

    accuracy                           0.40        20
   macro avg       0.40      0.40      0.39        20
weighted avg       0.40      0.40      0.39        20


Decision Tree Accuracy: 0.5000
              precision    recall  f1-score   support

           0       0.50      0.50      0.50        10
           1       0.50      0.50      0.50        10

    accuracy                           0.50        20
   macro avg       0.50      0.50      0.50        20
weighted avg       0.50      0.50      0.50        20


Logistic Regression Accuracy: 0.3500
              precision    recall  f1-score   support

           0       0.33      0.30      0.32        10
           1       0.36      0.40      0.38        10

    accuracy                           0.35        20
   macro avg       0.35   