In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score , classification_report

In [8]:
df = pd.read_csv('cleaned_telco_customer_churn.csv')

In [9]:
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,...,DeviceProtection_Yes,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No internet service,StreamingMovies_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,-1.277445,0,0,1,-1.160323,-0.994194,...,0,0,0,0,0,0,0,0,1,0
1,1,0,0,0,0.066327,1,1,0,-0.259629,-0.17374,...,1,0,0,0,0,0,0,0,0,1
2,1,0,0,0,-1.236724,1,0,1,-0.36266,-0.959649,...,0,0,0,0,0,0,0,0,0,1
3,1,0,0,0,0.514251,0,1,0,-0.746535,-0.195248,...,1,0,1,0,0,0,0,0,0,0
4,0,0,0,0,-1.236724,1,0,1,0.197365,-0.940457,...,0,0,0,0,0,0,0,0,1,0


In [10]:
df.shape

(7043, 30)

In [11]:
df.isnull().sum()

gender                                    0
SeniorCitizen                             0
Partner                                   0
Dependents                                0
tenure                                    0
PhoneService                              0
Contract                                  0
PaperlessBilling                          0
MonthlyCharges                            0
TotalCharges                             11
Churn                                     0
MultipleLines_No phone service            0
MultipleLines_Yes                         0
InternetService_Fiber optic               0
InternetService_No                        0
OnlineSecurity_No internet service        0
OnlineSecurity_Yes                        0
OnlineBackup_No internet service          0
OnlineBackup_Yes                          0
DeviceProtection_No internet service      0
DeviceProtection_Yes                      0
TechSupport_No internet service           0
TechSupport_Yes                 

In [12]:
df['TotalCharges'].replace(' ', np.nan, inplace=True)
df['TotalCharges'] = df['TotalCharges'].astype(float)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].replace(' ', np.nan, inplace=True)


In [13]:
df['TotalCharges'].isnull().sum()


np.int64(11)

In [14]:
df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)
df['TotalCharges'].isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)


np.int64(0)

In [15]:
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [17]:
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.82


In [18]:
print(f'classification_report:\n{classification_report(y_test, y_pred)}')

classification_report:
              precision    recall  f1-score   support

           0       0.86      0.90      0.88      1036
           1       0.69      0.60      0.64       373

    accuracy                           0.82      1409
   macro avg       0.77      0.75      0.76      1409
weighted avg       0.82      0.82      0.82      1409



In [19]:
from sklearn.ensemble import RandomForestClassifier


In [20]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [21]:
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.794889992902768
              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.47      0.55       373

    accuracy                           0.79      1409
   macro avg       0.74      0.69      0.71      1409
weighted avg       0.78      0.79      0.78      1409



In [22]:
from xgboost import XGBClassifier


In [23]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

In [24]:
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.7835344215755855
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      1036
           1       0.61      0.50      0.55       373

    accuracy                           0.78      1409
   macro avg       0.72      0.69      0.70      1409
weighted avg       0.77      0.78      0.78      1409



In [25]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [26]:
model = Sequential()

model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']

)

In [28]:
history = model.fit(
    X_train , y_train,
    epochs = 10,
    batch_size = 32,
    validation_data = (X_test, y_test),
    verbose = 1
)

Epoch 1/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.7425 - loss: 0.5119 - val_accuracy: 0.8133 - val_loss: 0.4092
Epoch 2/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.7864 - loss: 0.4392 - val_accuracy: 0.8162 - val_loss: 0.4007
Epoch 3/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7984 - loss: 0.4227 - val_accuracy: 0.8126 - val_loss: 0.3996
Epoch 4/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7975 - loss: 0.4261 - val_accuracy: 0.8204 - val_loss: 0.3993
Epoch 5/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8032 - loss: 0.4107 - val_accuracy: 0.8119 - val_loss: 0.4006
Epoch 6/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8002 - loss: 0.4202 - val_accuracy: 0.8176 - val_loss: 0.3990
Epoch 7/10
[1m177/177[0m