## ML Model usage

### Importing ML Libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier, XGBRFClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
ml = pd.read_csv('Customer_churn_Data_Cleaning_Analysis.csv')

In [4]:
ml.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tengrp,churn,mcgrp,tcgrp
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,Month-to-month,Yes,Electronic check,29.85,29.85,No,1 year,0,18.26 to 35.5,18.8 to 402.225
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,One year,No,Mailed check,56.95,1889.5,No,3 year,0,35.5 to 70.35,1397.475 to 3786.6
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1 year,1,35.5 to 70.35,18.8 to 402.225
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,One year,No,Bank transfer (automatic),42.3,1840.75,No,4 year,0,35.5 to 70.35,1397.475 to 3786.6
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1 year,1,70.35 to 89.85,18.8 to 402.225


In [5]:
ml.drop(columns=(['customerID','churn','mcgrp','tcgrp']), axis=1, inplace=True)

In [6]:
ml.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tengrp
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1 year
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,3 year
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1 year
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,...,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,4 year
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1 year


## Preprocessing

## Categorical cols
#### gender --> male/female --> Label Encoding
#### SeniorCitizen --> 0/1 --> already encoded
#### Partner --> Yes/No --> Label Encoding
#### Dependents --> Yes/No --> Label Encoding
#### PhoneService --> Yes/No --> Label Encoding
#### PaperlessBilling --> Yes/No --> Label Encoding
#### Churn --> Yes/No --> Label Encoding

In [7]:
le = LabelEncoder()
cat_cols = ['gender','Partner','Dependents','PhoneService','PaperlessBilling','Churn']
for i in cat_cols:
    ml[i] = le.fit_transform(ml[i])

In [8]:
ml.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tengrp
0,0,0,1,0,1,0,No phone service,DSL,No,Yes,...,No,No,No,Month-to-month,1,Electronic check,29.85,29.85,0,1 year
1,1,0,0,0,34,1,No,DSL,Yes,No,...,No,No,No,One year,0,Mailed check,56.95,1889.5,0,3 year
2,1,0,0,0,2,1,No,DSL,Yes,Yes,...,No,No,No,Month-to-month,1,Mailed check,53.85,108.15,1,1 year
3,1,0,0,0,45,0,No phone service,DSL,Yes,No,...,Yes,No,No,One year,0,Bank transfer (automatic),42.3,1840.75,0,4 year
4,0,0,0,0,2,1,No,Fiber optic,No,No,...,No,No,No,Month-to-month,1,Electronic check,70.7,151.65,1,1 year


## Multi Categorical Cols -- unordered
#### MultipleLines: ['No phone service' 'No' 'Yes'] --> one-hot encoding
#### InternetService: ['DSL' 'Fiber optic' 'No']
#### OnlineSecurity: ['No' 'Yes' 'No internet service']
#### OnlineBackup: ['Yes' 'No' 'No internet service']
#### DeviceProtection: ['No' 'Yes' 'No internet service']
#### TechSupport: ['No' 'Yes' 'No internet service']
#### StreamingTV: ['No' 'Yes' 'No internet service']
#### StreamingMovies: ['No' 'Yes' 'No internet service']
#### Contract: ['Month-to-month' 'One year' 'Two year'] 
#### PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)''Credit card (automatic)']