In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

In [2]:
dataset = pd.read_csv(r'C:\Users\guna laakshmi\Downloads\Datasets\Churn.csv')

In [3]:
dataset = pd.get_dummies(dataset, columns=['Geography', 'Gender'], drop_first=True)

In [4]:
X = dataset.drop(['Exited', 'CustomerId', 'Surname'], axis=1)
y = dataset['Exited']

In [5]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [8]:
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)

In [14]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 86.50%


In [16]:
print('\nConfusion Matrix:')
confusion_matrix(y_test, y_pred)


Confusion Matrix:


array([[1540,   67],
       [ 203,  190]], dtype=int64)

In [18]:
print('\nClassification Report:')
print(classification_report(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.74      0.48      0.58       393

    accuracy                           0.86      2000
   macro avg       0.81      0.72      0.75      2000
weighted avg       0.86      0.86      0.85      2000



In [23]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 24 candidates, totalling 72 fits


In [24]:
print("\nBest parameters found by Grid Search:")
print(grid_search.best_params_)


Best parameters found by Grid Search:
{'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [25]:
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

In [26]:
y_pred_best = best_model.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f'\nAccuracy with best model: {accuracy_best * 100:.2f}%')


Accuracy with best model: 86.15%
