In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [40]:
import torch 
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder

In [6]:
df = pd.read_csv("Churn_Modelling.csv")

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [30]:
new_df = df.iloc[:,3:]

In [32]:
new_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [34]:
encoder_gender = LabelEncoder()
new_df['Gender']= encoder_gender.fit_transform(new_df['Gender'])

In [38]:
df['Geography'].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [46]:
ohe = OneHotEncoder(sparse_output=False)

In [58]:
geo_encoder = ohe.fit_transform(new_df[['Geography']])
geo_encoder

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [56]:
ohe.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [64]:
temp = pd.DataFrame(geo_encoder,columns=ohe.get_feature_names_out(['Geography']))

In [72]:
newDf = pd.concat([new_df.drop("Geography",axis=1),temp],axis=1)

In [76]:
newDf

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [362]:
import pickle
with open('encoder_gender.pkl','wb') as file:
    pickle.dump(encoder_gender,file)

In [364]:
with open('encoder_geo.pkl','wb') as file:
    pickle.dump(ohe,file)

In [94]:
from torch.utils.data import Dataset,DataLoader

In [216]:
X = newDf.drop('Exited',axis=1)
y = newDf[['Exited']]
X = X.to_numpy()  # or X.values
y = y.to_numpy().reshape(-1, 1)

In [218]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [220]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [222]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [224]:
train_dataset = CustomDataset(X_train_scaled,y_train)

In [226]:
test_dataset = CustomDataset(X_test_scaled,y_test)

In [228]:
train_loader= DataLoader(train_dataset,shuffle=True,batch_size=32)
test_loader= DataLoader(test_dataset,shuffle=False,batch_size=32)

In [315]:
class ChurnModel(nn.Module):
    def __init__(self,input_dim):
        super().__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32,1)
        )

    def forward(self, x):
        x = self.fc1(x)
        return torch.sigmoid(x)

            
        

In [344]:
epochs = 200
model = ChurnModel(X.shape[1])
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [346]:
for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for X_batch, y_batch in train_loader:

        optimizer.zero_grad()
        outputs = model(X_batch)  
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * X_batch.size(0)  

    avg_train_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}")

Epoch 1/200, Train Loss: 0.4198
Epoch 2/200, Train Loss: 0.3728
Epoch 3/200, Train Loss: 0.3679
Epoch 4/200, Train Loss: 0.3609
Epoch 5/200, Train Loss: 0.3578
Epoch 6/200, Train Loss: 0.3552
Epoch 7/200, Train Loss: 0.3496
Epoch 8/200, Train Loss: 0.3524
Epoch 9/200, Train Loss: 0.3485
Epoch 10/200, Train Loss: 0.3468
Epoch 11/200, Train Loss: 0.3448
Epoch 12/200, Train Loss: 0.3468
Epoch 13/200, Train Loss: 0.3435
Epoch 14/200, Train Loss: 0.3422
Epoch 15/200, Train Loss: 0.3432
Epoch 16/200, Train Loss: 0.3404
Epoch 17/200, Train Loss: 0.3408
Epoch 18/200, Train Loss: 0.3374
Epoch 19/200, Train Loss: 0.3425
Epoch 20/200, Train Loss: 0.3396
Epoch 21/200, Train Loss: 0.3412
Epoch 22/200, Train Loss: 0.3407
Epoch 23/200, Train Loss: 0.3402
Epoch 24/200, Train Loss: 0.3383
Epoch 25/200, Train Loss: 0.3352
Epoch 26/200, Train Loss: 0.3364
Epoch 27/200, Train Loss: 0.3350
Epoch 28/200, Train Loss: 0.3371
Epoch 29/200, Train Loss: 0.3340
Epoch 30/200, Train Loss: 0.3358
Epoch 31/200, Train

In [350]:
model.eval()  # set model to evaluation mode

total_correct = 0
total_samples = 0

with torch.no_grad():
    for x, y in test_loader:
        outputs = model(x)                         # raw logits          # convert logits to probabilities
        preds = (outputs >= 0.5).float()             # threshold at 0.5

        correct = (preds == y.unsqueeze(1)).float().sum()
        total_correct += correct.item()
        total_samples += y.size(0)

accuracy = total_correct / total_samples
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 23.6130


In [356]:
torch.save(model.state_dict(), 'model.pth')