In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
import xgboost as xgb

np.random.seed(42)
n_samples = 1000
data = pd.DataFrame({
    'CustomerID': range(1, n_samples + 1),
    'Age': np.random.randint(18, 70, n_samples),
    'Income': np.random.randint(30000, 100000, n_samples),
    'Gender': np.random.choice(['Male', 'Female'], n_samples),
    'Made_Purchase': np.random.choice([0, 1], n_samples, p=[0.4, 0.6])
})
data['Gender'] = data['Gender'].map({'Male': 1, 'Female': 0})
X = data[['Age', 'Income', 'Gender']]
y = data['Made_Purchase']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
roc_auc_rf = roc_auc_score(y_test, y_pred_rf)

print("Random Forest Results:")
print(f"Accuracy: {accuracy_rf:.4f}")
print(f"Precision: {precision_rf:.4f}")
print(f"Recall: {recall_rf:.4f}")
print(f"AUC-ROC: {roc_auc_rf:.4f}")

xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
precision_xgb = precision_score(y_test, y_pred_xgb)
recall_xgb = recall_score(y_test, y_pred_xgb)
roc_auc_xgb = roc_auc_score(y_test, y_pred_xgb)

print("\nXGBoost Results:")
print(f"Accuracy: {accuracy_xgb:.4f}")
print(f"Precision: {precision_xgb:.4f}")
print(f"Recall: {recall_xgb:.4f}")
print(f"AUC-ROC: {roc_auc_xgb:.4f}")

cm_rf = confusion_matrix(y_test, y_pred_rf)
cm_xgb = confusion_matrix(y_test, y_pred_xgb)

print("\nConfusion Matrix for Random Forest:")
print(cm_rf)

print("\nConfusion Matrix for XGBoost:")
print(cm_xgb)

Random Forest Results:
Accuracy: 0.5300
Precision: 0.6032
Recall: 0.6333
AUC-ROC: 0.5042


Parameters: { "use_label_encoder" } are not used.




XGBoost Results:
Accuracy: 0.5200
Precision: 0.6000
Recall: 0.6000
AUC-ROC: 0.5000

Confusion Matrix for Random Forest:
[[30 50]
 [44 76]]

Confusion Matrix for XGBoost:
[[32 48]
 [48 72]]
