In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

# 1. Load the dataset
df = pd.read_csv(r'Data\bank-additional-full.csv', sep=';')

# 2. Preprocessing
# Drop Duration, Day, and Month to avoid data leakage and seasonal noise
cols_to_drop = ['duration', 'day', 'month']
existing_cols_to_drop = [c for c in cols_to_drop if c in df.columns]
df = df.drop(existing_cols_to_drop, axis=1)

# Map target variable to 0/1
df['y'] = df['y'].map({'yes': 1, 'no': 0})

# One-Hot Encode categorical variables
categorical_cols = df.select_dtypes(include=['object']).columns
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# 3. Select ONLY the Top 7 Features identified previously
top_7_features = [
    'poutcome_success',  # Success in previous campaign
    'age',               # Client age
    'pdays',             # Days since last contact
    'contact_unknown',   # Missing contact info
    'housing_yes',       # Has housing loan
    'balance',           # Average yearly balance
    'campaign'           # Number of contacts this campaign
]

X_top7 = df_encoded[top_7_features]
y = df_encoded['y']

# 4. Split Data (Train/Test)
X_train, X_test, y_train, y_test = train_test_split(X_top7, y, test_size=0.3, random_state=42)

# 5. Train Model
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_clf.fit(X_train, y_train)

# 6. Assess Performance
y_pred = gb_clf.predict(X_test)
y_pred_proba = gb_clf.predict_proba(X_test)[:, 1]

print("Performance Metrics with Top 7 Features:")
print(f"Accuracy:  {metrics.accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {metrics.precision_score(y_test, y_pred):.4f}")
print(f"Recall:    {metrics.recall_score(y_test, y_pred):.4f}")
print(f"ROC AUC:   {metrics.roc_auc_score(y_test, y_pred_proba):.4f}")

KeyError: "['contact_unknown', 'balance'] not in index"