<a href="https://colab.research.google.com/github/DeshanaShah2004/Afame_internship/blob/main/afame_churn_modelling_deshana_vikas_shah.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.base import clone
import numpy as np

# Load the dataset
df = pd.read_csv('/content/Churn_Modelling (1).csv')  # Update the path to your CSV file

# Preprocess the data
# Convert categorical columns to numerical values using one-hot encoding
df = pd.get_dummies(df, columns=['Geography', 'Gender'], drop_first=True)

# Define features and target variable
X = df.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = df['Exited']

# Split the data into training (80%) and testing+validation sets (20%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=21)

# Further split the testing+validation set into testing (50% of 20%) and validation sets (50% of 20%)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.5, random_state=21)

# Initialize the base learners with adjusted parameters
gb = GradientBoostingClassifier(n_estimators=500, learning_rate=0.0001, max_depth=3, min_samples_split=2, min_samples_leaf=1, random_state=21)
rf = RandomForestClassifier(n_estimators=500, max_depth=None, min_samples_split=2, min_samples_leaf=1, random_state=21)

# Initialize the meta-learner
meta_learner = LogisticRegression(C=0.01, random_state=42)

# Create arrays to hold out-of-fold predictions
train_preds = np.zeros((X_train.shape[0], 2))
val_preds = np.zeros((X_val.shape[0], 2))
test_preds = np.zeros((X_test.shape[0], 2))

# Cross-validation to create out-of-fold predictions for training the meta-learner
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for train_index, val_index in skf.split(X_train, y_train):
    X_fold_train, X_fold_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_fold_train, y_fold_val = y_train.iloc[train_index], y_train.iloc[val_index]

    # Clone the models for each fold
    gb_clone = clone(gb)
    rf_clone = clone(rf)

    # Train the models
    gb_clone.fit(X_fold_train, y_fold_train)
    rf_clone.fit(X_fold_train, y_fold_train)

    # Predict the validation set
    train_preds[val_index, 0] = gb_clone.predict(X_fold_val)
    train_preds[val_index, 1] = rf_clone.predict(X_fold_val)

    # Predict the validation and test sets (out-of-fold)
    val_preds[:, 0] += gb_clone.predict(X_val) / skf.n_splits
    val_preds[:, 1] += rf_clone.predict(X_val) / skf.n_splits
    test_preds[:, 0] += gb_clone.predict(X_test) / skf.n_splits
    test_preds[:, 1] += rf_clone.predict(X_test) / skf.n_splits

# Train the meta-learner
meta_learner.fit(train_preds, y_train)

# Predict on the validation set using the meta-learner
final_val_preds = meta_learner.predict(val_preds)
print("Validation Confusion Matrix:")
print(confusion_matrix(y_val, final_val_preds))
print("\nValidation Classification Report:")
print(classification_report(y_val, final_val_preds))
print("\nValidation Accuracy Score:")
print(accuracy_score(y_val, final_val_preds))

# Predict on the test set using the meta-learner
final_test_preds = meta_learner.predict(test_preds)
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, final_test_preds))
print("\nTest Classification Report:")
print(classification_report(y_test, final_test_preds))
print("\nTest Accuracy Score:")
print(accuracy_score(y_test, final_test_preds))


Validation Confusion Matrix:
[[793  13]
 [116  78]]

Validation Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.98      0.92       806
           1       0.86      0.40      0.55       194

    accuracy                           0.87      1000
   macro avg       0.86      0.69      0.74      1000
weighted avg       0.87      0.87      0.85      1000


Validation Accuracy Score:
0.871
Test Confusion Matrix:
[[779  15]
 [126  80]]

Test Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.98      0.92       794
           1       0.84      0.39      0.53       206

    accuracy                           0.86      1000
   macro avg       0.85      0.68      0.72      1000
weighted avg       0.86      0.86      0.84      1000


Test Accuracy Score:
0.859
