In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [2]:
df = pd.read_csv("Churn_Modelling.csv")  # replace path
df = df.drop(['RowNumber','CustomerId','Surname'], axis=1, errors='ignore')
df.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
# Example categorical columns: Geography, Gender
cat_features = ['Geography','Gender']
num_features = [c for c in df.columns if c not in cat_features + ['Exited']]

# Use ColumnTransformer to one-hot encode geography and label-encode gender or use get_dummies
X = pd.get_dummies(df.drop('Exited', axis=1), drop_first=True)
y = df['Exited']


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Feature importances
feat_imp = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
print("Top features:\n", feat_imp.head(10))


Accuracy: 0.8665
Confusion Matrix:
 [[1548   59]
 [ 208  185]]
Top features:
 Age                  0.236922
EstimatedSalary      0.147558
CreditScore          0.143338
Balance              0.141612
NumOfProducts        0.131486
Tenure               0.082080
IsActiveMember       0.040725
Geography_Germany    0.026190
HasCrCard            0.018454
Gender_Male          0.018421
dtype: float64
