In [9]:
import torch
import pandas as pd

In [12]:
df = pd.read_csv("customer_churn_dataset-testing-master.csv")
df

Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,1,22,Female,25,14,4,27,Basic,Monthly,598,9,1
1,2,41,Female,28,28,7,13,Standard,Monthly,584,20,0
2,3,47,Male,27,10,2,29,Premium,Annual,757,21,0
3,4,35,Male,9,12,5,17,Premium,Quarterly,232,18,0
4,5,53,Female,58,24,9,2,Standard,Annual,533,18,0
...,...,...,...,...,...,...,...,...,...,...,...,...
64369,64370,45,Female,33,12,6,21,Basic,Quarterly,947,14,1
64370,64371,37,Male,6,1,5,22,Standard,Annual,923,9,1
64371,64372,25,Male,39,14,8,30,Premium,Monthly,327,20,1
64372,64373,50,Female,18,19,7,22,Standard,Monthly,540,13,1


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64374 entries, 0 to 64373
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   CustomerID         64374 non-null  int64 
 1   Age                64374 non-null  int64 
 2   Gender             64374 non-null  object
 3   Tenure             64374 non-null  int64 
 4   Usage Frequency    64374 non-null  int64 
 5   Support Calls      64374 non-null  int64 
 6   Payment Delay      64374 non-null  int64 
 7   Subscription Type  64374 non-null  object
 8   Contract Length    64374 non-null  object
 9   Total Spend        64374 non-null  int64 
 10  Last Interaction   64374 non-null  int64 
 11  Churn              64374 non-null  int64 
dtypes: int64(9), object(3)
memory usage: 5.9+ MB


In [14]:
cat_features=[]
for col in df.columns:
  if df[col].dtype==object:
    cat_features.append(col)
cat_features

['Gender', 'Subscription Type', 'Contract Length']

In [15]:
df.drop('CustomerID',axis=1,inplace=True)
df

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,22,Female,25,14,4,27,Basic,Monthly,598,9,1
1,41,Female,28,28,7,13,Standard,Monthly,584,20,0
2,47,Male,27,10,2,29,Premium,Annual,757,21,0
3,35,Male,9,12,5,17,Premium,Quarterly,232,18,0
4,53,Female,58,24,9,2,Standard,Annual,533,18,0
...,...,...,...,...,...,...,...,...,...,...,...
64369,45,Female,33,12,6,21,Basic,Quarterly,947,14,1
64370,37,Male,6,1,5,22,Standard,Annual,923,9,1
64371,25,Male,39,14,8,30,Premium,Monthly,327,20,1
64372,50,Female,18,19,7,22,Standard,Monthly,540,13,1


In [16]:
from sklearn.preprocessing import LabelEncoder
label={}
for col in cat_features:
  label[col]=LabelEncoder()
  df[col]=label[col].fit_transform(df[col])
df

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
0,22,0,25,14,4,27,0,1,598,9,1
1,41,0,28,28,7,13,2,1,584,20,0
2,47,1,27,10,2,29,1,0,757,21,0
3,35,1,9,12,5,17,1,2,232,18,0
4,53,0,58,24,9,2,2,0,533,18,0
...,...,...,...,...,...,...,...,...,...,...,...
64369,45,0,33,12,6,21,0,2,947,14,1
64370,37,1,6,1,5,22,2,0,923,9,1
64371,25,1,39,14,8,30,1,1,327,20,1
64372,50,0,18,19,7,22,2,1,540,13,1


In [20]:
import numpy as np
import torch
cat_value=np.stack([df[col] for col in cat_features],1)
cat_value=torch.tensor(cat_value,dtype=torch.int64)
cat_value

tensor([[0, 0, 1],
        [0, 2, 1],
        [1, 1, 0],
        ...,
        [1, 1, 1],
        [0, 2, 1],
        [0, 2, 1]])

In [21]:
cont_features=[]
for col in df.columns:
  if col in ['Gender', 'Subscription Type', 'Contract Length','Churn']:
    pass
  else:
    cont_features.append(col)
cont_features

['Age',
 'Tenure',
 'Usage Frequency',
 'Support Calls',
 'Payment Delay',
 'Total Spend',
 'Last Interaction']

In [22]:
cont_value=np.stack([df[cont] for cont in cont_features],1)
cont_value=torch.tensor(cont_value,dtype=torch.float)
cont_value

tensor([[ 22.,  25.,  14.,  ...,  27., 598.,   9.],
        [ 41.,  28.,  28.,  ...,  13., 584.,  20.],
        [ 47.,  27.,  10.,  ...,  29., 757.,  21.],
        ...,
        [ 25.,  39.,  14.,  ...,  30., 327.,  20.],
        [ 50.,  18.,  19.,  ...,  22., 540.,  13.],
        [ 52.,  45.,  15.,  ...,  25., 696.,  22.]])

In [23]:
y=torch.tensor(df["Churn"].values,dtype=torch.int64).reshape(-1,1)
y

tensor([[1],
        [0],
        [0],
        ...,
        [1],
        [1],
        [1]])

In [24]:
cat_dim=[len(df[cat].unique()) for cat in cat_features]
cat_dim

[2, 3, 3]

In [25]:
embed_dim=[(x,min(50,x+1//2)) for x in cat_dim]
embed_dim

[(2, 2), (3, 3), (3, 3)]

In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
embed_representation=nn.ModuleList([nn.Embedding(inp,out) for inp,out in embed_dim])
embed_representation

ModuleList(
  (0): Embedding(2, 2)
  (1): Embedding(3, 3)
  (2): Embedding(3, 3)
)

In [27]:
pd.set_option('display.max_rows', 500)
embed_val=[]
for i,e in enumerate(embed_representation):
  embed_val.append(e(cat_value[:,i]))
z=torch.cat(embed_val,1)
z

tensor([[ 2.3536, -0.3435, -0.0185,  ..., -0.6563, -0.0812,  0.5083],
        [ 2.3536, -0.3435, -0.5470,  ..., -0.6563, -0.0812,  0.5083],
        [-1.0776,  0.3573, -0.4580,  ...,  0.5763, -1.2193,  0.2740],
        ...,
        [-1.0776,  0.3573, -0.4580,  ..., -0.6563, -0.0812,  0.5083],
        [ 2.3536, -0.3435, -0.5470,  ..., -0.6563, -0.0812,  0.5083],
        [ 2.3536, -0.3435, -0.5470,  ..., -0.6563, -0.0812,  0.5083]],
       grad_fn=<CatBackward0>)

In [28]:
droput=nn.Dropout(.4)
final_embed=droput(z)
final_embed

tensor([[ 0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.1353,  0.8471],
        [ 0.0000, -0.0000, -0.0000,  ..., -0.0000, -0.1353,  0.8471],
        [-0.0000,  0.5954, -0.0000,  ...,  0.9605, -0.0000,  0.4566],
        ...,
        [-1.7961,  0.5954, -0.0000,  ..., -1.0938, -0.0000,  0.8471],
        [ 0.0000, -0.5725, -0.9116,  ..., -1.0938, -0.1353,  0.0000],
        [ 3.9227, -0.0000, -0.9116,  ..., -0.0000, -0.0000,  0.0000]],
       grad_fn=<MulBackward0>)

In [29]:
import torch
import torch.nn as nn

class FNN(nn.Module):
    def __init__(self, embed_dim, n_cont, layers, p):
        super().__init__()
        self.embeds = nn.ModuleList([nn.Embedding(inp, out) for inp, out in embed_dim])
        self.emb_drop = nn.Dropout(p)
        self.bn_cont = nn.BatchNorm1d(n_cont)

        layers_list = []
        n_embed = sum((out for inp, out in embed_dim))
        n_in = n_embed + n_cont

        for i in layers:
            layers_list.append(nn.Linear(n_in, i))
            layers_list.append(nn.ReLU(inplace=True))
            layers_list.append(nn.BatchNorm1d(i))
            layers_list.append(nn.Dropout(p))
            n_in = i


        layers_list.append(nn.Linear(layers[-1], 1))


        layers_list.append(nn.Sigmoid())

        self.layers = nn.Sequential(*layers_list)

    def forward(self, x_cat, x_cont):
        embeddings = []
        for i, e in enumerate(self.embeds):
            embeddings.append(e(x_cat[:, i]))

        x = torch.cat(embeddings, 1)
        x = self.emb_drop(x)
        x_cont = self.bn_cont(x_cont)
        x = torch.cat([x, x_cont], 1)

        # Passer par les couches
        x = self.layers(x)

        # Optionnel : Si vous voulez une sortie binaire directement
        # x = torch.round(x)

        return x


In [38]:
import torch.nn as nn
torch.manual_seed(100)
model = FNN(embed_dim, len(cont_features), [500,300], p=0.4)

In [39]:
model

FNN(
  (embeds): ModuleList(
    (0): Embedding(2, 2)
    (1): Embedding(3, 3)
    (2): Embedding(3, 3)
  )
  (emb_drop): Dropout(p=0.4, inplace=False)
  (bn_cont): BatchNorm1d(7, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): Linear(in_features=15, out_features=500, bias=True)
    (1): ReLU(inplace=True)
    (2): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=500, out_features=300, bias=True)
    (5): ReLU(inplace=True)
    (6): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Dropout(p=0.4, inplace=False)
    (8): Linear(in_features=300, out_features=1, bias=True)
    (9): Sigmoid()
  )
)

In [40]:
size=64374
test_size=int(size*0.2)
cat_train=cat_value[:size-test_size]
cat_test=cat_value[size-test_size:size]
cont_train=cont_value[:size-test_size]
cont_test=cont_value[size-test_size:size]
y_train=y[:size-test_size]
y_test=y[size-test_size:size]

In [41]:
import torch
import torch.nn as nn

# Remplacez la fonction de perte par nn.BCELoss()
loss_function = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 2000
final_losses = []

for i in range(epochs):
    i = i + 1
    y_pred = model(cat_train, cont_train)

    # Calcul de la perte
    loss = loss_function(y_pred, y_train.float())  # BCE Loss, convert y_train to float

    final_losses.append(loss.item())

    if i % 10 == 1:
        print(f"Epoch number: {i} and the loss: {loss.item()}")
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


Epoch number: 1 and the loss: 0.7076825499534607
Epoch number: 11 and the loss: 0.4038510322570801
Epoch number: 21 and the loss: 0.3473988175392151
Epoch number: 31 and the loss: 0.31677794456481934
Epoch number: 41 and the loss: 0.29673027992248535
Epoch number: 51 and the loss: 0.2757944166660309
Epoch number: 61 and the loss: 0.2603594958782196
Epoch number: 71 and the loss: 0.24162615835666656
Epoch number: 81 and the loss: 0.23014703392982483
Epoch number: 91 and the loss: 0.21371515095233917
Epoch number: 101 and the loss: 0.20207907259464264
Epoch number: 111 and the loss: 0.19187451899051666
Epoch number: 121 and the loss: 0.18526604771614075
Epoch number: 131 and the loss: 0.17724207043647766
Epoch number: 141 and the loss: 0.16948096454143524
Epoch number: 151 and the loss: 0.1632024198770523
Epoch number: 161 and the loss: 0.15699288249015808
Epoch number: 171 and the loss: 0.15175606310367584
Epoch number: 181 and the loss: 0.14700524508953094
Epoch number: 191 and the los

KeyboardInterrupt: 

In [50]:
#### Validate the Test Data
y_pred=""
with torch.no_grad():
    y_pred=model(cat_test,cont_test)
    loss=torch.sqrt(loss_function(y_pred,y_test.float()))
# y_pred is already a 2D tensor, no need to reshape
y_pred = y_pred.numpy()  # Convertir le tensor en tableau NumPy
df = pd.DataFrame(y_pred, columns=['Prediction'])
df['Prediction'] = df['Prediction'].round(4)
df['Prediction'] = df['Prediction'].apply(lambda x: 1 if x > 0.00001 else 0)
df


Unnamed: 0,Prediction
0,1
1,0
2,1
3,1
4,1
...,...
12869,1
12870,1
12871,1
12872,1


In [51]:
data_verify=pd.DataFrame(y_test.tolist(),columns=["Test"])
data_verify

Unnamed: 0,Test
0,1
1,1
2,1
3,1
4,1
...,...
12869,1
12870,1
12871,1
12872,1


In [53]:
df1=pd.concat([data_verify,df],axis=1)
df1.shape
k=len(df1[df1['Test']!=df1['Prediction']])
cor=12874-k
cor=cor/12874
cor

0.9140127388535032

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=4d966c84-d823-41d5-86bf-78bde6a05141' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>