In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset into a pandas DataFrame
dataset_path = r'C:\Users\tahsi\OneDrive\Desktop\python_ws\bankloan.csv'
df = pd.read_csv(dataset_path)

# Feature Engineering: Create new features
df['Income_per_Family'] = df['Income'] / df['Family']  # Calculate income per family member
df['Experience_per_Age'] = df['Experience'] / df['Age']  # Calculate experience to age ratio
df['CCAvg_per_Income'] = df['CCAvg'] / df['Income']  # Calculate credit card average spending relative to income

# Save the dataset with the new features to a CSV file
output_dataset_path = r'C:\Users\tahsi\Documents\bankloan_with_features.csv'
df.to_csv(output_dataset_path, index=False)

# Define features and target
X = df.drop(columns=['ID', 'ZIP.Code', 'Personal.Loan'])  # Drop unnecessary columns and target variable
y = df['Personal.Loan']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Gradient Boosting model with hyperparameters
gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)

# Train the model
gbm.fit(X_train, y_train)

# Predict on the test set
y_pred_gbm = gbm.predict(X_test)

# Evaluate the model
accuracy_gbm = accuracy_score(y_test, y_pred_gbm)
print(f'Gradient Boosting Accuracy: {accuracy_gbm}')
print('Gradient Boosting Classification Report:')
print(classification_report(y_test, y_pred_gbm))
print('Gradient Boosting Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_gbm))


Gradient Boosting Accuracy: 0.991
Gradient Boosting Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       895
           1       0.99      0.92      0.96       105

    accuracy                           0.99      1000
   macro avg       0.99      0.96      0.98      1000
weighted avg       0.99      0.99      0.99      1000

Gradient Boosting Confusion Matrix:
[[894   1]
 [  8  97]]
