In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("customer_churn.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [4]:
df.drop("customerID", inplace=True, axis=1)

columns_to_encode = ['InternetService', 'Contract', 'PaymentMethod']
df = pd.get_dummies(df, columns=columns_to_encode)

pd.set_option('future.no_silent_downcasting', True)
df.replace("No phone service", "No", inplace=True)
df.replace("No internet service", "No", inplace=True)
df.replace({"No": 0, "Yes": 1}, inplace=True)
df.replace({"Female": 0, "Male": 1}, inplace=True)
df.replace({False: 0, True: 1}, inplace=True)

df['TotalCharges'] = df['TotalCharges'].replace(' ', np.nan)
df['TotalCharges'] = df['TotalCharges'].astype(float)

In [5]:
for f in df:
    print(df[f'{f}'].value_counts()) 
    print('********************************')

gender
1    3555
0    3488
Name: count, dtype: int64
********************************
SeniorCitizen
0    5901
1    1142
Name: count, dtype: int64
********************************
Partner
0    3641
1    3402
Name: count, dtype: int64
********************************
Dependents
0    4933
1    2110
Name: count, dtype: int64
********************************
tenure
1     613
72    362
2     238
3     200
4     176
     ... 
28     57
39     56
44     51
36     50
0      11
Name: count, Length: 73, dtype: int64
********************************
PhoneService
1    6361
0     682
Name: count, dtype: int64
********************************
MultipleLines
0    4072
1    2971
Name: count, dtype: int64
********************************
OnlineSecurity
0    5024
1    2019
Name: count, dtype: int64
********************************
OnlineBackup
0    4614
1    2429
Name: count, dtype: int64
********************************
DeviceProtection
0    4621
1    2422
Name: count, dtype: int64
**********************

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 27 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   gender                                   7043 non-null   object 
 1   SeniorCitizen                            7043 non-null   int64  
 2   Partner                                  7043 non-null   object 
 3   Dependents                               7043 non-null   object 
 4   tenure                                   7043 non-null   int64  
 5   PhoneService                             7043 non-null   object 
 6   MultipleLines                            7043 non-null   object 
 7   OnlineSecurity                           7043 non-null   object 
 8   OnlineBackup                             7043 non-null   object 
 9   DeviceProtection                         7043 non-null   object 
 10  TechSupport                              7043 no

In [7]:
columns_to_convert = df.select_dtypes(exclude=['int64', 'float64']).columns

# Convert object type columns to int type
df[columns_to_convert] = df[columns_to_convert].astype(int)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 27 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   gender                                   7043 non-null   int32  
 1   SeniorCitizen                            7043 non-null   int64  
 2   Partner                                  7043 non-null   int32  
 3   Dependents                               7043 non-null   int32  
 4   tenure                                   7043 non-null   int64  
 5   PhoneService                             7043 non-null   int32  
 6   MultipleLines                            7043 non-null   int32  
 7   OnlineSecurity                           7043 non-null   int32  
 8   OnlineBackup                             7043 non-null   int32  
 9   DeviceProtection                         7043 non-null   int32  
 10  TechSupport                              7043 no

In [9]:
pd.set_option('display.max_columns', 28)
df.head(3)

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,InternetService_DSL,InternetService_Fiber optic,InternetService_No,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,29.85,29.85,0,1,0,0,1,0,0,0,0,1,0
1,1,0,0,0,34,1,0,1,0,1,0,0,0,0,56.95,1889.5,0,1,0,0,0,1,0,0,0,0,1
2,1,0,0,0,2,1,0,1,1,0,0,0,0,1,53.85,108.15,1,1,0,0,1,0,0,0,0,0,1


In [10]:
scaler = MinMaxScaler()

columns_to_scale = ['tenure', 'MonthlyCharges', 'TotalCharges']
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

In [11]:
df.MonthlyCharges.values

array([0.11542289, 0.38507463, 0.35422886, ..., 0.11293532, 0.55870647,
       0.86965174])

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 27 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   gender                                   7043 non-null   int32  
 1   SeniorCitizen                            7043 non-null   int64  
 2   Partner                                  7043 non-null   int32  
 3   Dependents                               7043 non-null   int32  
 4   tenure                                   7043 non-null   float64
 5   PhoneService                             7043 non-null   int32  
 6   MultipleLines                            7043 non-null   int32  
 7   OnlineSecurity                           7043 non-null   int32  
 8   OnlineBackup                             7043 non-null   int32  
 9   DeviceProtection                         7043 non-null   int32  
 10  TechSupport                              7043 no

In [13]:
X = df.drop(columns='Churn')
y = df['Churn']
X.shape

(7043, 26)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10)

In [15]:
len(X_train.columns)

26

In [18]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, input_shape=(26,), activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(X_train, y_train, epochs=10)

Epoch 1/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7270 - loss: 0.6387
Epoch 2/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7411 - loss: 0.6530
Epoch 3/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7307 - loss: 0.6334
Epoch 4/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7351 - loss: 0.6153
Epoch 5/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7326 - loss: 0.6047
Epoch 6/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7330 - loss: 0.5962
Epoch 7/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7404 - loss: 0.5855
Epoch 8/10
[1m155/155[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7389 - loss: 0.5827
Epoch 9/10
[1m155/155[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x138994ac910>

In [19]:
model.evaluate(X_test, y_test)

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7652 - loss: 0.5534


[0.5755395293235779, 0.7363937497138977]