1. INSTALLING REQUIRED LIBRARIES

In [34]:
!pip install scikit-learn joblib




2. IMPORTING LIBRARIES

In [35]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

import joblib


3. LOADING DATASET

In [36]:
data = pd.read_csv("/content/Telco-Customer-Churn.csv")
data.head()


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


4. BASIC DATA UNDERSTANDING

In [37]:
data.shape


(7043, 21)

In [38]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [39]:
data.isnull().sum()


Unnamed: 0,0
customerID,0
gender,0
SeniorCitizen,0
Partner,0
Dependents,0
tenure,0
PhoneService,0
MultipleLines,0
InternetService,0
OnlineSecurity,0


5. DATA CLEANING

In [40]:
data.drop("customerID", axis=1, inplace=True)


In [41]:
data["TotalCharges"] = pd.to_numeric(data["TotalCharges"], errors="coerce")
data["TotalCharges"].fillna(data["TotalCharges"].mean(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data["TotalCharges"].fillna(data["TotalCharges"].mean(), inplace=True)


6. ENCODE CATEGORICAL COLUMNS

In [42]:
label_encoder = LabelEncoder()

for col in data.select_dtypes(include="object"):
    data[col] = label_encoder.fit_transform(data[col])


7. SPLIT INPUT & OUTPUT

In [43]:
X = data.drop("Churn", axis=1)
y = data["Churn"]


8. TRAIN-TEST SPLIT

In [44]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


9. TRAIN DECISION TREE MODEL

In [45]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)


10. MODEL EVALUATION

In [46]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy


0.7253371185237757

In [47]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy


0.7253371185237757

In [48]:
cm = confusion_matrix(y_test, y_pred)
cm


array([[833, 203],
       [184, 189]])

11. SAVE THE TRAINED MODEL

In [49]:
joblib.dump(model, "customer_churn_decision_tree.pkl")


['customer_churn_decision_tree.pkl']

12. DOWNLOAD MODEL FILE

In [50]:
from google.colab import files
files.download("customer_churn_decision_tree.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

13. TEST PREDICTION

In [53]:
sample_input = X_test.iloc[0:1]
prediction=model.predict(sample_input)
if prediction[0] == 1:
    print("Prediction: Customer WILL churn")
else:
    print("Prediction: Customer will NOT churn")



Prediction: Customer WILL churn
