In [480]:
import pandas as pd
import numpy as np
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [481]:
data = pd.read_csv('Telco-Customer-Churn.csv')

data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [482]:
data.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


In [483]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [484]:
data.drop(['customerID', 'gender', 'SeniorCitizen', 'MultipleLines','PhoneService', 'InternetService', 'StreamingMovies'], axis=1, inplace=True)

data['TotalCharges'] = data['TotalCharges'].replace(' ', np.nan)

data.dropna(axis=0)

data

Unnamed: 0,Partner,Dependents,tenure,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Yes,No,1,No,Yes,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,No,No,34,Yes,No,Yes,No,No,One year,No,Mailed check,56.95,1889.5,No
2,No,No,2,Yes,Yes,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,No,No,45,Yes,No,Yes,Yes,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,No,No,2,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Yes,Yes,24,Yes,No,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,Yes,Yes,72,No,Yes,Yes,No,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,Yes,Yes,11,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,Yes,No,4,No,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [485]:
import pickle

label_encoder = {}

for col in ['Partner', 'Dependents', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'Contract', 'PaperlessBilling', 'PaymentMethod']:
    labelencoder = LabelEncoder()
    data[col] = labelencoder.fit_transform(data[col])
    label_encoder[col] = labelencoder

with open('label_encoder_churn_data.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

In [486]:
X = data.drop(columns='Churn')
y = data['Churn'].map({'Yes':1, 'No':0})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [487]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

In [488]:
model = RandomForestRegressor(n_estimators=10)

model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

In [489]:
accuracy = model.score(X_train_scaled, y_train)

print(f'Accuracy of model: {accuracy*100:.4f} %')

Accuracy of model: 85.4056 %


In [490]:
new_data = {
    'Partner': 'No', 
    'Dependents': 'No', 
    'tenure': 35,
    'OnlineSecurity': 'Yes', 
    'OnlineBackup': 'Yes', 
    'DeviceProtection': 'Yes', 
    'TechSupport': 'Yes', 
    'StreamingTV': 'Yes', 
    'Contract': 'One year', 
    'PaperlessBilling': 'Yes', 
    'PaymentMethod': 'Electronic check', 
    'MonthlyCharges': 100,
    'TotalCharges': 2000 
}

test_data = pd.DataFrame([new_data])

with open('label_encoder_churn_data.pkl', 'rb') as file:
    lencoder = pickle.load(file)

for col in ['Partner', 'Dependents', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'Contract', 'PaperlessBilling', 'PaymentMethod']:
    encoder = lencoder[col]
    test_data[col] = encoder.transform(test_data[col])

prediction = model.predict(test_data)

print(test_data)

prediction


   Partner  Dependents  tenure  OnlineSecurity  OnlineBackup  \
0        0           0      35               2             2   

   DeviceProtection  TechSupport  StreamingTV  Contract  PaperlessBilling  \
0                 2            2            2         1                 1   

   PaymentMethod  MonthlyCharges  TotalCharges  
0              2             100          2000  




array([0.2])