## **Feature Engineering**

In [308]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder,LabelEncoder
  

In [309]:
dataset = pd.read_csv('../datasets/raw/Telco-customer-churn-init-dataset.csv') 

In [310]:
dataset.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [311]:
dataset['tenure'].dtype

dtype('int64')

In [312]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [313]:
indices_to_drop = dataset[dataset['tenure'] == 0].index
print(indices_to_drop)
dataset.drop(indices_to_drop, inplace=True)

Index([488, 753, 936, 1082, 1340, 3331, 3826, 4380, 5218, 6670, 6754], dtype='int64')


In [314]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7032 non-null   object 
 1   gender            7032 non-null   object 
 2   SeniorCitizen     7032 non-null   int64  
 3   Partner           7032 non-null   object 
 4   Dependents        7032 non-null   object 
 5   tenure            7032 non-null   int64  
 6   PhoneService      7032 non-null   object 
 7   MultipleLines     7032 non-null   object 
 8   InternetService   7032 non-null   object 
 9   OnlineSecurity    7032 non-null   object 
 10  OnlineBackup      7032 non-null   object 
 11  DeviceProtection  7032 non-null   object 
 12  TechSupport       7032 non-null   object 
 13  StreamingTV       7032 non-null   object 
 14  StreamingMovies   7032 non-null   object 
 15  Contract          7032 non-null   object 
 16  PaperlessBilling  7032 non-null   object 
 17  

In [315]:
dataset[dataset['TotalCharges'] == ' '].index

Index([], dtype='int64')

In [316]:
dataset.shape

(7032, 21)

In [317]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7032 non-null   object 
 1   gender            7032 non-null   object 
 2   SeniorCitizen     7032 non-null   int64  
 3   Partner           7032 non-null   object 
 4   Dependents        7032 non-null   object 
 5   tenure            7032 non-null   int64  
 6   PhoneService      7032 non-null   object 
 7   MultipleLines     7032 non-null   object 
 8   InternetService   7032 non-null   object 
 9   OnlineSecurity    7032 non-null   object 
 10  OnlineBackup      7032 non-null   object 
 11  DeviceProtection  7032 non-null   object 
 12  TechSupport       7032 non-null   object 
 13  StreamingTV       7032 non-null   object 
 14  StreamingMovies   7032 non-null   object 
 15  Contract          7032 non-null   object 
 16  PaperlessBilling  7032 non-null   object 
 17  

#### **Column `customerID`**

In [318]:
dataset.shape

(7032, 21)

In [319]:
dataset = dataset.drop('customerID',axis=1)

In [320]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [321]:
dataset.shape

(7032, 20)

#### **Column Report - `customerID`**
- Dropped the feature
- It was not relevant to the target variable
---

In [322]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [323]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   object 
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   object 
 3   Dependents        7032 non-null   object 
 4   tenure            7032 non-null   int64  
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column `gender`**

In [324]:
encoder = LabelEncoder()

In [325]:
encoded_gender = pd.DataFrame(encoder.fit_transform(y=dataset[['gender']]),columns=['gender'])
encoded_gender.index= dataset.index  
encoded_gender.head()

  y = column_or_1d(y, warn=True)


Unnamed: 0,gender
0,0
1,1
2,1
3,1
4,0


In [326]:
dataset['gender'] = encoded_gender
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [327]:
dataset['gender'].info()

<class 'pandas.core.series.Series'>
Index: 7032 entries, 0 to 7042
Series name: gender
Non-Null Count  Dtype
--------------  -----
7032 non-null   int64
dtypes: int64(1)
memory usage: 109.9 KB


In [328]:
dataset.shape

(7032, 20)

In [329]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   object 
 3   Dependents        7032 non-null   object 
 4   tenure            7032 non-null   int64  
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `gender`**
- Performed Label Encoding
- `Female: 0`, `Male: 1`
--- 

In [330]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [331]:
dataset.SeniorCitizen.info()

<class 'pandas.core.series.Series'>
Index: 7032 entries, 0 to 7042
Series name: SeniorCitizen
Non-Null Count  Dtype
--------------  -----
7032 non-null   int64
dtypes: int64(1)
memory usage: 109.9 KB


#### **Column - `Partner`**

In [332]:
dataset['Partner'].head()

0    Yes
1     No
2     No
3     No
4     No
Name: Partner, dtype: object

In [333]:
encoder = LabelEncoder() 

In [334]:
encoded_partner = pd.DataFrame(encoder.fit_transform(dataset['Partner']),columns=['Partner'])
encoded_partner.index = dataset.index
encoded_partner.head()

Unnamed: 0,Partner
0,1
1,0
2,0
3,0
4,0


In [335]:
dataset['Partner'] = encoded_partner
dataset.head() 

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [336]:
dataset.shape

(7032, 20)

In [337]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   object 
 4   tenure            7032 non-null   int64  
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `Partner`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [338]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [340]:
def label_encode(dataset,feature):
    encoder = LabelEncoder()
    encoded_feature = pd.DataFrame(encoder.fit_transform(dataset[feature]), columns=[feature])
    encoded_feature.index = dataset.index
    
    dataset[feature] = encoded_feature
    return dataset

#### **Column - `Dependents`**

In [341]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [342]:
dataset = label_encode(dataset,'Dependents')

In [343]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [344]:
dataset.shape

(7032, 20)

In [345]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   int64  
 4   tenure            7032 non-null   int64  
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `Dependents`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [346]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [347]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [348]:
def standardize(dataset,feature):
    scaler = StandardScaler()
    scaled_feature = pd.DataFrame(scaler.fit_transform(dataset[[feature]]),columns=[feature])
    scaled_feature.index = dataset.index
    
    dataset[feature] = scaled_feature 
    return dataset

#### **Column - `tenure`**

In [349]:
dataset = standardize(dataset,'tenure')

In [350]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [351]:
dataset.tenure.describe()

count    7.032000e+03
mean    -1.126643e-16
std      1.000071e+00
min     -1.280248e+00
25%     -9.542963e-01
50%     -1.394171e-01
75%      9.199259e-01
max      1.612573e+00
Name: tenure, dtype: float64

In [352]:
dataset.shape

(7032, 20)

In [353]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   int64  
 4   tenure            7032 non-null   float64
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `tenure`**
- Performed Standardization
---

In [354]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [355]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


#### **Column - `PhoneService`**

In [356]:
dataset.PhoneService.unique()

array(['No', 'Yes'], dtype=object)

In [357]:
label_encode(dataset,'PhoneService')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,0,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,1,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,1,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,0,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,1,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [358]:
dataset.shape

(7032, 20)

In [359]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   int64  
 4   tenure            7032 non-null   float64
 5   PhoneService      7032 non-null   int64  
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `PhoneService`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---  

In [360]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [361]:
def remove_category(dataset, feature,category):
    dataset[feature] = dataset[feature].replace(category,'No')
    return dataset

#### **Column - `MultipleLines`**

In [362]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,0,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,1,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,1,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,0,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,1,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [363]:
dataset.MultipleLines.unique()

array(['No phone service', 'No', 'Yes'], dtype=object)

In [364]:
dataset = remove_category(dataset,'MultipleLines','No phone service')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,0,No,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,1,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,1,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,0,No,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,1,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [365]:
dataset.MultipleLines.unique()

array(['No', 'Yes'], dtype=object)

In [366]:
dataset['MultipleLines'].value_counts()

MultipleLines
No     4065
Yes    2967
Name: count, dtype: int64

In [367]:
dataset = label_encode(dataset,'MultipleLines')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.280248,0,0,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,1,0,0,0,0.064303,1,0,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,1,0,0,0,-1.239504,1,0,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,1,0,0,0,0.512486,0,0,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,0,0,0,0,-1.239504,1,0,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [368]:
dataset.shape

(7032, 20)

In [369]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   int64  
 1   SeniorCitizen     7032 non-null   int64  
 2   Partner           7032 non-null   int64  
 3   Dependents        7032 non-null   int64  
 4   tenure            7032 non-null   float64
 5   PhoneService      7032 non-null   int64  
 6   MultipleLines     7032 non-null   int64  
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

#### **Column Report - `MultipleLines`**
- Dropped `No phone service`
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [370]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

#### **Column - `InternetService`**

In [371]:
dataset.InternetService.value_counts()

InternetService
Fiber optic    3096
DSL            2416
No             1520
Name: count, dtype: int64

In [372]:
dataset['HasInternet'] = dataset['InternetService'].apply(lambda x: 0 if x == "No" else 1)
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet
0,0,0,1,0,-1.280248,0,0,DSL,No,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1
1,1,0,0,0,0.064303,1,0,DSL,Yes,No,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,1
2,1,0,0,0,-1.239504,1,0,DSL,Yes,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1
3,1,0,0,0,0.512486,0,0,DSL,Yes,No,...,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1
4,0,0,0,0,-1.239504,1,0,Fiber optic,No,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1


In [373]:
dataset.shape

(7032, 21)

In [374]:
dataset.HasInternet.value_counts()

HasInternet
1    5512
0    1520
Name: count, dtype: int64

In [375]:
dataset.shape

(7032, 21)

In [376]:
encoder = OneHotEncoder(sparse_output=False)

In [377]:
encoded_df = pd.DataFrame(encoder.fit_transform(dataset[['InternetService']]),columns=['InternetService_DSL','InternetService_Fiber_optic','InternetService_No'])
encoded_df.index = dataset.index
encoded_df.head()

Unnamed: 0,InternetService_DSL,InternetService_Fiber_optic,InternetService_No
0,1.0,0.0,0.0
1,1.0,0.0,0.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,1.0,0.0


In [378]:
encoded_df.drop('InternetService_No',inplace=True,axis=1)

In [379]:
encoded_df.head()

Unnamed: 0,InternetService_DSL,InternetService_Fiber_optic
0,1.0,0.0
1,1.0,0.0
2,1.0,0.0
3,1.0,0.0
4,0.0,1.0


In [380]:
encoded_df.shape

(7032, 2)

In [381]:
dataset.shape

(7032, 21)

In [382]:
dataset = pd.concat([dataset,encoded_df.astype(int)],axis=1)

In [383]:
dataset.shape

(7032, 23)

In [384]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,DSL,No,Yes,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,DSL,Yes,No,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,DSL,Yes,Yes,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,DSL,Yes,No,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,Fiber optic,No,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [385]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn', 'HasInternet',
       'InternetService_DSL', 'InternetService_Fiber_optic'],
      dtype='object')

In [386]:
dataset.drop('InternetService',axis=1,inplace=True)

In [387]:
dataset.shape

(7032, 22)

In [388]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,No,Yes,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,Yes,No,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,Yes,Yes,No,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,Yes,No,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,No,No,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [389]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic'],
      dtype='object')

In [390]:
dataset.shape

(7032, 22)

In [391]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   object 
 8   OnlineBackup                 7032 non-null   object 
 9   DeviceProtection             7032 non-null   object 
 10  TechSupport                  7032 non-null   object 
 11  StreamingTV                  7032 non-null   object 
 12  StreamingMovies              7032 non-null   object 
 13  Contract               

#### **Column Report - `InternetService`**
- Added `HasInternet` feature
- Performed One-Hot Encoding
---

In [392]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,No,Yes,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,Yes,No,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,Yes,Yes,No,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,Yes,No,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,No,No,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


#### **Column - `OnlineSecurity`**

In [393]:
dataset.OnlineSecurity.value_counts()

OnlineSecurity
No                     3497
Yes                    2015
No internet service    1520
Name: count, dtype: int64

In [394]:
dataset = remove_category(dataset,'OnlineSecurity','No internet service')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,No,Yes,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,Yes,No,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,Yes,Yes,No,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,Yes,No,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,No,No,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [395]:
dataset.shape

(7032, 22)

In [396]:
dataset.OnlineSecurity.value_counts()

OnlineSecurity
No     5017
Yes    2015
Name: count, dtype: int64

In [397]:
label_encode(dataset,'OnlineSecurity')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,Yes,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,No,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,Yes,No,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,No,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,No,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [398]:
dataset.shape

(7032, 22)

In [399]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   object 
 9   DeviceProtection             7032 non-null   object 
 10  TechSupport                  7032 non-null   object 
 11  StreamingTV                  7032 non-null   object 
 12  StreamingMovies              7032 non-null   object 
 13  Contract               

#### **Column Report - `OnlineSecurity`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

#### **Column - `OnlineBackup`**

In [400]:
dataset['OnlineBackup'].value_counts()

OnlineBackup
No                     3087
Yes                    2425
No internet service    1520
Name: count, dtype: int64

In [401]:
remove_category(dataset,'OnlineBackup','No internet service')
dataset['OnlineBackup'].value_counts()

OnlineBackup
No     4607
Yes    2425
Name: count, dtype: int64

In [402]:
dataset = label_encode(dataset,'OnlineBackup')
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,No,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [403]:
dataset.shape

(7032, 22)

In [404]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   object 
 10  TechSupport                  7032 non-null   object 
 11  StreamingTV                  7032 non-null   object 
 12  StreamingMovies              7032 non-null   object 
 13  Contract               

#### **Column Report - `OnlineBackup`**
- Removed redundant category
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

#### **Column - `DeviceProtection`**

In [405]:
dataset['DeviceProtection'].value_counts()

DeviceProtection
No                     3094
Yes                    2418
No internet service    1520
Name: count, dtype: int64

In [406]:
dataset= remove_category(dataset,'DeviceProtection','No internet service')

In [407]:
label_encode(dataset,'DeviceProtection')

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,-0.343137,1,1,1,0,1,...,Yes,One year,Yes,Mailed check,84.80,1990.5,No,1,1,0
7039,0,0,1,1,1.612573,1,1,0,1,1,...,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No,1,0,1
7040,0,0,1,1,-0.872808,0,0,1,0,0,...,No,Month-to-month,Yes,Electronic check,29.60,346.45,No,1,1,0
7041,1,1,1,0,-1.158016,1,1,0,0,0,...,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes,1,0,1


In [408]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [409]:
dataset.shape

(7032, 22)

In [410]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   int64  
 10  TechSupport                  7032 non-null   object 
 11  StreamingTV                  7032 non-null   object 
 12  StreamingMovies              7032 non-null   object 
 13  Contract               

#### **Column Report - `DeviceProtection`**
- Removed redundant category
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [411]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic'],
      dtype='object')

In [412]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,No,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


#### **Column - `TechSupport`**

In [413]:
dataset['TechSupport'].value_counts()

TechSupport
No                     3472
Yes                    2040
No internet service    1520
Name: count, dtype: int64

In [414]:
dataset= remove_category(dataset,'TechSupport','No internet service')

In [415]:
dataset['TechSupport'].value_counts()

TechSupport
No     4992
Yes    2040
Name: count, dtype: int64

In [416]:
dataset = label_encode(dataset,'TechSupport')

In [417]:
dataset['TechSupport'].value_counts()

TechSupport
0    4992
1    2040
Name: count, dtype: int64

In [418]:
dataset.shape

(7032, 22)

In [419]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   int64  
 10  TechSupport                  7032 non-null   int64  
 11  StreamingTV                  7032 non-null   object 
 12  StreamingMovies              7032 non-null   object 
 13  Contract               

#### **Column Report - `TechSupport`**
- Removed redundant category
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [420]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic'],
      dtype='object')

#### **Columns - `[StreamingTV,StreamingMovies]`**

In [421]:
dataset['StreamingMovies'].value_counts()

StreamingMovies
No                     2781
Yes                    2731
No internet service    1520
Name: count, dtype: int64

In [422]:
dataset['StreamingTV'].value_counts()

StreamingTV
No                     2809
Yes                    2703
No internet service    1520
Name: count, dtype: int64

In [423]:
dataset = remove_category(dataset,'StreamingTV','No internet service')
dataset=  remove_category(dataset,'StreamingMovies','No internet service')

In [424]:
dataset = label_encode(dataset,'StreamingTV')
dataset = label_encode(dataset,'StreamingMovies')

In [425]:
dataset['StreamingMovies'].value_counts()

StreamingMovies
0    4301
1    2731
Name: count, dtype: int64

In [426]:
dataset['StreamingTV'].value_counts()

StreamingTV
0    4329
1    2703
Name: count, dtype: int64

In [427]:
dataset.shape

(7032, 22)

In [428]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   int64  
 10  TechSupport                  7032 non-null   int64  
 11  StreamingTV                  7032 non-null   int64  
 12  StreamingMovies              7032 non-null   int64  
 13  Contract               

#### **Column Report - `[StreamingTV,StreamingMovies]`**
- Removed redundant category
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [429]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic'],
      dtype='object')

#### **Column - `Contract`**

In [430]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,0,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,One year,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,0,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,One year,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,0,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [431]:
dataset.Contract.value_counts()

Contract
Month-to-month    3875
Two year          1685
One year          1472
Name: count, dtype: int64

In [432]:
contract_mapping = {
    'Month-to-month': 0,
    'One year': 1,
    'Two year': 2
}


In [433]:
dataset['Contract'] = dataset['Contract'].map(contract_mapping)

In [434]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,0,0,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,1,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,0,0,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,1,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,0,0,Yes,Electronic check,70.7,151.65,Yes,1,0,1


In [435]:
dataset['Contract'].value_counts()

Contract
0    3875
2    1685
1    1472
Name: count, dtype: int64

In [436]:
dataset.shape

(7032, 22)

In [437]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   int64  
 10  TechSupport                  7032 non-null   int64  
 11  StreamingTV                  7032 non-null   int64  
 12  StreamingMovies              7032 non-null   int64  
 13  Contract               

#### **Column Report - `Contract`**
- Performed Ordinal Encoding
--- 

In [438]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,0,0,Yes,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,1,No,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,0,0,Yes,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,1,No,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,0,0,Yes,Electronic check,70.7,151.65,Yes,1,0,1


#### **Column - `PaperlessBilling`**

In [439]:
dataset['PaperlessBilling'].value_counts()

PaperlessBilling
Yes    4168
No     2864
Name: count, dtype: int64

In [440]:
dataset = label_encode(dataset,'PaperlessBilling')

In [441]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic
0,0,0,1,0,-1.280248,0,0,0,1,0,...,0,0,1,Electronic check,29.85,29.85,No,1,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,1,0,Mailed check,56.95,1889.5,No,1,1,0
2,1,0,0,0,-1.239504,1,0,1,1,0,...,0,0,1,Mailed check,53.85,108.15,Yes,1,1,0
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,1,0,Bank transfer (automatic),42.3,1840.75,No,1,1,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,0,0,1,Electronic check,70.7,151.65,Yes,1,0,1


In [442]:
dataset.shape

(7032, 22)

In [443]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   gender                       7032 non-null   int64  
 1   SeniorCitizen                7032 non-null   int64  
 2   Partner                      7032 non-null   int64  
 3   Dependents                   7032 non-null   int64  
 4   tenure                       7032 non-null   float64
 5   PhoneService                 7032 non-null   int64  
 6   MultipleLines                7032 non-null   int64  
 7   OnlineSecurity               7032 non-null   int64  
 8   OnlineBackup                 7032 non-null   int64  
 9   DeviceProtection             7032 non-null   int64  
 10  TechSupport                  7032 non-null   int64  
 11  StreamingTV                  7032 non-null   int64  
 12  StreamingMovies              7032 non-null   int64  
 13  Contract               

#### **Column Report - `PaperlessBilling`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
---

In [444]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic'],
      dtype='object')

#### **Column - `PaymentMethod`**

In [445]:
dataset['PaymentMethod'].value_counts()

PaymentMethod
Electronic check             2365
Mailed check                 1604
Bank transfer (automatic)    1542
Credit card (automatic)      1521
Name: count, dtype: int64

In [446]:
encoder = OneHotEncoder(sparse_output=False)

In [447]:
payment_dummies = pd.get_dummies(dataset['PaymentMethod'], prefix='PaymentMethod', drop_first=True).astype(int)
payment_dummies.head()

Unnamed: 0,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,0
1,0,0,1
2,0,0,1
3,0,0,0
4,0,1,0


In [448]:
dataset = pd.concat([dataset.drop('PaymentMethod', axis=1), payment_dummies], axis=1)
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,-1.280248,0,0,0,1,0,...,1,29.85,29.85,No,1,1,0,0,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,56.95,1889.5,No,1,1,0,0,0,1
2,1,0,0,0,-1.239504,1,0,1,1,0,...,1,53.85,108.15,Yes,1,1,0,0,0,1
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,42.3,1840.75,No,1,1,0,0,0,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,1,70.7,151.65,Yes,1,0,1,0,1,0


In [449]:
dataset['PaymentMethod_Electronic check'].value_counts()

PaymentMethod_Electronic check
0    4667
1    2365
Name: count, dtype: int64

In [450]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'MonthlyCharges', 'TotalCharges',
       'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic', 'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],
      dtype='object')

In [451]:
dataset.shape

(7032, 24)

In [453]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   gender                                 7032 non-null   int64  
 1   SeniorCitizen                          7032 non-null   int64  
 2   Partner                                7032 non-null   int64  
 3   Dependents                             7032 non-null   int64  
 4   tenure                                 7032 non-null   float64
 5   PhoneService                           7032 non-null   int64  
 6   MultipleLines                          7032 non-null   int64  
 7   OnlineSecurity                         7032 non-null   int64  
 8   OnlineBackup                           7032 non-null   int64  
 9   DeviceProtection                       7032 non-null   int64  
 10  TechSupport                            7032 non-null   int64  
 11  Streaming

#### **Column Report - `PaymentMethod`**
- Performed One-Hot Encoding
---

In [454]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,-1.280248,0,0,0,1,0,...,1,29.85,29.85,No,1,1,0,0,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,56.95,1889.5,No,1,1,0,0,0,1
2,1,0,0,0,-1.239504,1,0,1,1,0,...,1,53.85,108.15,Yes,1,1,0,0,0,1
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,42.3,1840.75,No,1,1,0,0,0,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,1,70.7,151.65,Yes,1,0,1,0,1,0


#### **Column - `[MonthlyCharges,TotalCharges]`**

In [455]:
dataset = standardize(dataset,'MonthlyCharges')
dataset= standardize(dataset,'TotalCharges')

In [456]:
dataset.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,HasInternet,InternetService_DSL,InternetService_Fiber_optic,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,-1.280248,0,0,0,1,0,...,1,-1.161694,-0.994194,No,1,1,0,0,1,0
1,1,0,0,0,0.064303,1,0,1,0,1,...,0,-0.260878,-0.17374,No,1,1,0,0,0,1
2,1,0,0,0,-1.239504,1,0,1,1,0,...,1,-0.363923,-0.959649,Yes,1,1,0,0,0,1
3,1,0,0,0,0.512486,0,0,1,0,1,...,0,-0.74785,-0.195248,No,1,1,0,0,0,0
4,0,0,0,0,-1.239504,1,0,0,0,0,...,1,0.196178,-0.940457,Yes,1,0,1,0,1,0


In [458]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   gender                                 7032 non-null   int64  
 1   SeniorCitizen                          7032 non-null   int64  
 2   Partner                                7032 non-null   int64  
 3   Dependents                             7032 non-null   int64  
 4   tenure                                 7032 non-null   float64
 5   PhoneService                           7032 non-null   int64  
 6   MultipleLines                          7032 non-null   int64  
 7   OnlineSecurity                         7032 non-null   int64  
 8   OnlineBackup                           7032 non-null   int64  
 9   DeviceProtection                       7032 non-null   int64  
 10  TechSupport                            7032 non-null   int64  
 11  Streaming

#### **Column Report - `[MonthlyCharges,TotalCharges]`**
- Standardized both features
---

In [459]:
dataset.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'MonthlyCharges', 'TotalCharges',
       'Churn', 'HasInternet', 'InternetService_DSL',
       'InternetService_Fiber_optic', 'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check'],
      dtype='object')

In [460]:
dataset.shape

(7032, 24)

#### **Column - `Churn`**

In [461]:
dataset['Churn'].value_counts()

Churn
No     5163
Yes    1869
Name: count, dtype: int64

In [462]:
dataset = label_encode(dataset,'Churn') 

In [463]:
dataset['Churn'].value_counts()

Churn
0    5163
1    1869
Name: count, dtype: int64

In [464]:
dataset.shape

(7032, 24)

In [466]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   gender                                 7032 non-null   int64  
 1   SeniorCitizen                          7032 non-null   int64  
 2   Partner                                7032 non-null   int64  
 3   Dependents                             7032 non-null   int64  
 4   tenure                                 7032 non-null   float64
 5   PhoneService                           7032 non-null   int64  
 6   MultipleLines                          7032 non-null   int64  
 7   OnlineSecurity                         7032 non-null   int64  
 8   OnlineBackup                           7032 non-null   int64  
 9   DeviceProtection                       7032 non-null   int64  
 10  TechSupport                            7032 non-null   int64  
 11  Streaming

#### **Column Report - `Churn`**
- Performed Label Encoding
- `No: 0`, `Yes: 1`
--- 

In [467]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   gender                                 7032 non-null   int64  
 1   SeniorCitizen                          7032 non-null   int64  
 2   Partner                                7032 non-null   int64  
 3   Dependents                             7032 non-null   int64  
 4   tenure                                 7032 non-null   float64
 5   PhoneService                           7032 non-null   int64  
 6   MultipleLines                          7032 non-null   int64  
 7   OnlineSecurity                         7032 non-null   int64  
 8   OnlineBackup                           7032 non-null   int64  
 9   DeviceProtection                       7032 non-null   int64  
 10  TechSupport                            7032 non-null   int64  
 11  Streaming

In [468]:
dataset.to_csv('../datasets/processed/Telco-customer-churn-processed-dataset.csv',index=False)

## **Feature Engineering Complete**
---