In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, FunctionTransformer
import pickle

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [3]:
data = pd.read_csv('Churn_Modelling.csv') 
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
data  =data.drop(columns=['RowNumber', 'CustomerId', 'Surname'])

In [5]:
data.shape

(10000, 11)

In [6]:
data.describe()


Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
 10  Exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [8]:
data.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [9]:
data['IsActiveMember'].unique()

array([1, 0], dtype=int64)

In [10]:
numerical_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']
categorical_features = ['Geography', 'Gender']  
binary_features = ['HasCrCard', 'IsActiveMember']

def identity_func(x):
    return x

Pipeline = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features),
    ('bin', FunctionTransformer(identity_func), binary_features)
])

data_prepared = Pipeline.fit_transform(data)
data_prepared

array([[-0.32622142,  0.29351742, -1.04175968, ...,  0.        ,
         1.        ,  1.        ],
       [-0.44003595,  0.19816383, -1.38753759, ...,  0.        ,
         0.        ,  1.        ],
       [-1.53679418,  0.29351742,  1.03290776, ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 0.60498839, -0.27860412,  0.68712986, ...,  0.        ,
         0.        ,  1.        ],
       [ 1.25683526,  0.29351742, -0.69598177, ...,  1.        ,
         1.        ,  0.        ],
       [ 1.46377078, -1.04143285, -0.35020386, ...,  0.        ,
         1.        ,  0.        ]])

In [11]:
y = data['Exited']

In [12]:
with open("preprocessor.pkl", "wb") as f:
    pickle.dump(Pipeline, f)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(data_prepared, y, test_size=0.2, random_state=42)

# ANN

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from imblearn.over_sampling import SMOTE

In [15]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [16]:
X_train_tensor = torch.tensor(X_train_resampled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_resampled, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [17]:
# Count of how many of each class
num_churn = (y_train_tensor == 1).sum().item()
num_not_churn = (y_train_tensor == 0).sum().item()
total = num_churn + num_not_churn

# calculate weight for class weight : Output = (1644,6356)
# num_churn, num_not_churn

wieght_for_0 = total/(2*num_not_churn)
weight_for_1 = total/(2*num_churn)

weights = torch.tensor([wieght_for_0, weight_for_1])

In [18]:
class ChurnNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)
        # self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out
        #return self.sigmoid(out)


In [25]:
X_train_tensor.shape[1]

13

In [19]:
input_size = X_train_tensor.shape[1]
model = ChurnNet(input_size)

In [20]:
criterion = nn.BCEWithLogitsLoss(pos_weight= torch.tensor([weight_for_1]))
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
epochs = 100
for epoch in range(epochs):
    model.train()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch +1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.6765
Epoch [20/100], Loss: 0.6525
Epoch [30/100], Loss: 0.6238
Epoch [40/100], Loss: 0.5921
Epoch [50/100], Loss: 0.5638
Epoch [60/100], Loss: 0.5434
Epoch [70/100], Loss: 0.5280
Epoch [80/100], Loss: 0.5137
Epoch [90/100], Loss: 0.4997
Epoch [100/100], Loss: 0.4859


In [22]:
torch.save(model.state_dict(), 'churn_model.pth')

In [23]:
model = ChurnNet(input_size)
model.load_state_dict(torch.load('churn_model.pth'))

model.eval()  # evaluation mode (disables dropout, etc.)
with torch.no_grad():  # no gradient calculation
    predicted = model(X_test_tensor)
    predicted_classes = (predicted >= 0.5).float()  # convert probs to 0 or 1
    accuracy = (predicted_classes.eq(y_test_tensor).sum() / y_test_tensor.shape[0]).item()
    print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 81.40%


In [24]:
from sklearn.metrics import classification_report, confusion_matrix

# Convert PyTorch tensors to NumPy arrays
y_true = y_test_tensor.cpu().numpy()
y_pred = predicted_classes.cpu().numpy()

# Classification report (precision, recall, f1, accuracy)
print(classification_report(y_true, y_pred, target_names=["No Churn", "Churn"]))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

              precision    recall  f1-score   support

    No Churn       0.91      0.86      0.88      1607
       Churn       0.52      0.64      0.57       393

    accuracy                           0.81      2000
   macro avg       0.71      0.75      0.73      2000
weighted avg       0.83      0.81      0.82      2000

Confusion Matrix:
[[1377  230]
 [ 142  251]]
