# Customer Churn Prediction

## Import necessary libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

## Load dataset


In [None]:
churn_data = pd.read_csv("/content/drive/MyDrive/Codsoft dataset/Customer Churn/Churn_Modelling.csv")

## Display initial data structure


In [None]:
print("Initial Dataset Preview:")
print(churn_data.head())

Initial Dataset Preview:
   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   
1          2    15647311      Hill          608     Spain  Female   41   
2          3    15619304      Onio          502    France  Female   42   
3          4    15701354      Boni          699    France  Female   39   
4          5    15737888  Mitchell          850     Spain  Female   43   

   Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMember  \
0       2       0.00              1          1               1   
1       1   83807.86              1          0               1   
2       8  159660.80              3          1               0   
3       1       0.00              2          0               0   
4       2  125510.82              1          1               1   

   EstimatedSalary  Exited  
0        101348.88       1  
1        112542.58       0  
2        113931.57       1  
3         93826.6

## Dropping irrelevant columns


In [None]:
columns_to_remove = ['RowNumber', 'CustomerId', 'Surname']
churn_data.drop(columns=columns_to_remove, inplace=True)

## Encoding categorical variables


In [None]:
encoder_geo = LabelEncoder()
encoder_gender = LabelEncoder()

churn_data['Geography'] = encoder_geo.fit_transform(churn_data['Geography'])
churn_data['Gender'] = encoder_gender.fit_transform(churn_data['Gender'])

## Splitting features and target variable


In [None]:
features = churn_data.drop(columns=['Exited'])
target = churn_data['Exited']

## Dividing data into training and testing sets


In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

## Print dataset shapes


In [None]:
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)

Training Data Shape: (8000, 10)
Testing Data Shape: (2000, 10)


## Feature Scaling


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Training Gradient Boosting Classifier


In [None]:
gb_classifier = GradientBoostingClassifier()
gb_classifier.fit(X_train_scaled, y_train)

## Making Predictions


In [None]:
y_pred_gb = gb_classifier.predict(X_test_scaled)

## Evaluating Model Performance

In [None]:
accuracy_gb = accuracy_score(y_test, y_pred_gb)
conf_matrix_gb = confusion_matrix(y_test, y_pred_gb)
print("Gradient Boosting Accuracy:", accuracy_gb)
print("Confusion Matrix:\n", conf_matrix_gb)

Gradient Boosting Accuracy: 0.866
Confusion Matrix:
 [[1547   60]
 [ 208  185]]


## Hyperparameter Tuning using GridSearchCV

In [None]:
param_grid_gb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}

In [None]:
grid_search_gb = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid=param_grid_gb,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    verbose=2
)

In [None]:
grid_search_gb.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits


## Extracting the best model


In [None]:
best_gb_model = grid_search_gb.best_estimator_
print("Best Hyperparameters:", grid_search_gb.best_params_)

Best Hyperparameters: {'learning_rate': 0.05, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 300}


## Making Predictions with the Optimized Model


In [None]:
y_pred_tuned_gb = best_gb_model.predict(X_test_scaled)

## Evaluating the Tuned Model


In [None]:
accuracy_tuned_gb = accuracy_score(y_test, y_pred_tuned_gb)
conf_matrix_tuned_gb = confusion_matrix(y_test, y_pred_tuned_gb)
print("Optimized Model Accuracy:", accuracy_tuned_gb)
print("Confusion Matrix After Tuning:\n", conf_matrix_tuned_gb)

Optimized Model Accuracy: 0.869
Confusion Matrix After Tuning:
 [[1541   66]
 [ 196  197]]
