In [119]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder 

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn_features.transformers import DataFrameSelector

from sklearn.metrics import  classification_report

from sklearn.ensemble import RandomForestClassifier

from xgboost import XGBClassifier

import joblib

In [120]:
FILE_PATH = "D:\Kasban_Churn_LLM\data\WA_Fn-UseC_-Telco-Customer-Churn.csv"
df = pd.read_csv(FILE_PATH)


df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [121]:
df.shape


(7043, 21)

In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [123]:
df.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


In [124]:
df.isna().sum(axis=0)

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [125]:
df.drop(columns=['customerID'], axis=1, inplace=True)

In [126]:
df.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [127]:
num_cols = [col for col in df.columns if df[col].dtype in ['float32', 'float64', 'int32', 'int64']]
categ_cols = [col for col in df.columns if df[col].dtype not in ['float32', 'float64', 'int32', 'int64']]
print(f"Numerical columns: {num_cols}")
print(f"Categorical columns: {categ_cols}")

Numerical columns: ['SeniorCitizen', 'tenure', 'MonthlyCharges']
Categorical columns: ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'TotalCharges', 'Churn']


In [128]:
df["TotalCharges"].unique()

array(['29.85', '1889.5', '108.15', ..., '346.45', '306.6', '6844.5'],
      shape=(6531,), dtype=object)

In [129]:
df['TotalCharges'] = pd.to_numeric(df.TotalCharges, errors='coerce')
df.isnull().sum()

gender               0
SeniorCitizen        0
Partner              0
Dependents           0
tenure               0
PhoneService         0
MultipleLines        0
InternetService      0
OnlineSecurity       0
OnlineBackup         0
DeviceProtection     0
TechSupport          0
StreamingTV          0
StreamingMovies      0
Contract             0
PaperlessBilling     0
PaymentMethod        0
MonthlyCharges       0
TotalCharges        11
Churn                0
dtype: int64

In [130]:
df[df['TotalCharges'].isnull()]

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
488,Female,0,Yes,Yes,0,No,No phone service,DSL,Yes,No,Yes,Yes,Yes,No,Two year,Yes,Bank transfer (automatic),52.55,,No
753,Male,0,No,Yes,0,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,20.25,,No
936,Female,0,Yes,Yes,0,Yes,No,DSL,Yes,Yes,Yes,No,Yes,Yes,Two year,No,Mailed check,80.85,,No
1082,Male,0,Yes,Yes,0,Yes,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,25.75,,No
1340,Female,0,Yes,Yes,0,No,No phone service,DSL,Yes,Yes,Yes,Yes,Yes,No,Two year,No,Credit card (automatic),56.05,,No
3331,Male,0,Yes,Yes,0,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,19.85,,No
3826,Male,0,Yes,Yes,0,Yes,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,25.35,,No
4380,Female,0,Yes,Yes,0,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,20.0,,No
5218,Male,0,Yes,Yes,0,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,One year,Yes,Mailed check,19.7,,No
6670,Female,0,Yes,Yes,0,Yes,Yes,DSL,No,Yes,Yes,Yes,Yes,No,Two year,No,Mailed check,73.35,,No


In [131]:
df.dropna(inplace=True)

In [132]:
df.shape

(7032, 20)

In [133]:
num_cols = [col for col in df.columns if df[col].dtype in ['float32', 'float64', 'int32', 'int64']]
categ_cols = [col for col in df.columns if df[col].dtype not in ['float32', 'float64', 'int32', 'int64']]
print(f"Numerical columns: {num_cols}")
print(f"Categorical columns: {categ_cols}")

Numerical columns: ['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges']
Categorical columns: ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'Churn']


In [134]:
df["SeniorCitizen"].value_counts()

SeniorCitizen
0    5890
1    1142
Name: count, dtype: int64

In [135]:
df["SeniorCitizen"].dtype

dtype('int64')

In [136]:
df["SeniorCitizen"] = df["SeniorCitizen"].replace({1: "Yes", 0: "No"})

In [137]:
df["SeniorCitizen"].value_counts()

SeniorCitizen
No     5890
Yes    1142
Name: count, dtype: int64

In [138]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7032 non-null   object 
 1   SeniorCitizen     7032 non-null   object 
 2   Partner           7032 non-null   object 
 3   Dependents        7032 non-null   object 
 4   tenure            7032 non-null   int64  
 5   PhoneService      7032 non-null   object 
 6   MultipleLines     7032 non-null   object 
 7   InternetService   7032 non-null   object 
 8   OnlineSecurity    7032 non-null   object 
 9   OnlineBackup      7032 non-null   object 
 10  DeviceProtection  7032 non-null   object 
 11  TechSupport       7032 non-null   object 
 12  StreamingTV       7032 non-null   object 
 13  StreamingMovies   7032 non-null   object 
 14  Contract          7032 non-null   object 
 15  PaperlessBilling  7032 non-null   object 
 16  PaymentMethod     7032 non-null   object 
 17  

In [139]:
for col in categ_cols:
    print(df[col].value_counts())
    print("--------------------------------------------------")

gender
Male      3549
Female    3483
Name: count, dtype: int64
--------------------------------------------------
Partner
No     3639
Yes    3393
Name: count, dtype: int64
--------------------------------------------------
Dependents
No     4933
Yes    2099
Name: count, dtype: int64
--------------------------------------------------
PhoneService
Yes    6352
No      680
Name: count, dtype: int64
--------------------------------------------------
MultipleLines
No                  3385
Yes                 2967
No phone service     680
Name: count, dtype: int64
--------------------------------------------------
InternetService
Fiber optic    3096
DSL            2416
No             1520
Name: count, dtype: int64
--------------------------------------------------
OnlineSecurity
No                     3497
Yes                    2015
No internet service    1520
Name: count, dtype: int64
--------------------------------------------------
OnlineBackup
No                     3087
Yes            

In [140]:
X = df.drop(columns=['Churn'], axis=1)
y = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

In [141]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [142]:
print('X_train shape -- ', X_train.shape)
print('y_train shape -- ', y_train.shape)
print('X_test shape -- ', X_test.shape)
print('y_test shape -- ', y_test.shape)

X_train shape --  (5625, 19)
y_train shape --  (5625,)
X_test shape --  (1407, 19)
y_test shape --  (1407,)


In [159]:
X_test.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
974,Female,No,Yes,Yes,59,Yes,No,DSL,No,Yes,No,Yes,Yes,Yes,Two year,Yes,Credit card (automatic),75.95,4542.35
619,Female,No,No,No,7,Yes,Yes,Fiber optic,No,Yes,No,No,No,No,Month-to-month,Yes,Bank transfer (automatic),78.55,522.95
4289,Female,No,No,No,54,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,20.1,1079.45
3721,Female,No,No,No,2,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Mailed check,20.65,38.7
4533,Female,No,Yes,No,71,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Two year,Yes,Bank transfer (automatic),105.15,7555.0


In [160]:
y_test.head()

974     0
619     0
4289    0
3721    1
4533    0
Name: Churn, dtype: int64

In [143]:
num_cols = [col for col in X_train.columns if X_train[col].dtype in ['float32', 'float64', 'int32', 'int64']]
categ_cols = [col for col in X_train.columns if X_train[col].dtype not in ['float32', 'float64', 'int32', 'int64']]


print('Numerical Columns : \n', num_cols)
print('**'*40)
print('Categorical Columns : \n', categ_cols)

Numerical Columns : 
 ['tenure', 'MonthlyCharges', 'TotalCharges']
********************************************************************************
Categorical Columns : 
 ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']


In [144]:
num_pipeline = Pipeline([
                        ('selector', DataFrameSelector(num_cols)),   
                        ('scaler', StandardScaler())
                        ])


categ_pipeline = Pipeline(steps=[
            ('selector', DataFrameSelector(categ_cols)),    
            ('OHE', OneHotEncoder(sparse_output=False))])

total_pipeline = FeatureUnion(transformer_list=[
                                            ('num_pipe', num_pipeline),
                                            ('categ_pipe', categ_pipeline)
                                               ]
                             )
X_train_final = total_pipeline.fit_transform(X_train)
X_test_final = total_pipeline.transform(X_test)                

In [145]:
X_train_final.shape, X_test_final.shape

((5625, 46), (1407, 46))

In [146]:
rf_model = RandomForestClassifier(random_state=42)

params_best_forest = {'n_estimators': np.arange(100, 500, 50), 
                      'max_depth': np.arange(4, 20, 2), 
                      'max_samples': [0.7, 0.8, 0.9, 1]}

search_random_forest = RandomizedSearchCV( estimator=rf_model, param_distributions=params_best_forest, 
                                          n_iter=20, scoring="f1_weighted", 
                                          cv=5, verbose=6, random_state=42)
search_random_forest.fit(X_train_final, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END max_depth=18, max_samples=0.7, n_estimators=300;, score=0.776 total time=   1.1s
[CV 2/5] END max_depth=18, max_samples=0.7, n_estimators=300;, score=0.805 total time=   1.1s
[CV 3/5] END max_depth=18, max_samples=0.7, n_estimators=300;, score=0.780 total time=   1.3s
[CV 4/5] END max_depth=18, max_samples=0.7, n_estimators=300;, score=0.766 total time=   1.0s
[CV 5/5] END max_depth=18, max_samples=0.7, n_estimators=300;, score=0.780 total time=   1.1s
[CV 1/5] END max_depth=4, max_samples=0.7, n_estimators=400;, score=0.772 total time=   0.7s
[CV 2/5] END max_depth=4, max_samples=0.7, n_estimators=400;, score=0.802 total time=   0.6s
[CV 3/5] END max_depth=4, max_samples=0.7, n_estimators=400;, score=0.773 total time=   0.6s
[CV 4/5] END max_depth=4, max_samples=0.7, n_estimators=400;, score=0.758 total time=   0.6s
[CV 5/5] END max_depth=4, max_samples=0.7, n_estimators=400;, score=0.762 total time=   0.6s
[CV

0,1,2
,estimator,RandomForestC...ndom_state=42)
,param_distributions,"{'max_depth': array([ 4, 6..., 14, 16, 18]), 'max_samples': [0.7, 0.8, ...], 'n_estimators': array([100, 1...50, 400, 450])}"
,n_iter,20
,scoring,'f1_weighted'
,n_jobs,
,refit,True
,cv,5
,verbose,6
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,n_estimators,np.int64(450)
,criterion,'gini'
,max_depth,np.int64(8)
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [147]:
best_foresr_params = search_random_forest.best_params_
print('best_foresr_params -- ', best_foresr_params)

best_forest = search_random_forest.best_estimator_  
print('best_forest -- ', best_forest)

best_foresr_params --  {'n_estimators': np.int64(450), 'max_samples': 0.8, 'max_depth': np.int64(8)}
best_forest --  RandomForestClassifier(max_depth=np.int64(8), max_samples=0.8,
                       n_estimators=np.int64(450), random_state=42)


In [148]:
f1_scores_tuned_forest = cross_val_score(estimator=best_forest, X=X_train_final, y=y_train, 
                               cv=5, scoring='f1_weighted', n_jobs=-1)  

print(f'Scores Using Tuned RandomForest --- {np.round(f1_scores_tuned_forest, 4)}')
print(f'Mean Scores Using Tuned RandomForest --- {f1_scores_tuned_forest.mean():.4f}')


Scores Using Tuned RandomForest --- [0.7964 0.8223 0.7964 0.7729 0.7833]
Mean Scores Using Tuned RandomForest --- 0.7942


In [149]:
xgb = XGBClassifier(random_state=42)


In [150]:
params_best_xgb = {'n_estimators': np.arange(100, 200, 50), 'max_depth': np.arange(4, 15, 2), 
                   'learning_rate': [0.1, 0.2], 'subsample': [0.8, 0.9]}


grid_xgb = GridSearchCV(estimator=xgb, param_grid=params_best_xgb, cv=5, 
                        scoring='f1_weighted', n_jobs=-1, verbose=6)
grid_xgb.fit(X_train_final, y_train)  

Fitting 5 folds for each of 48 candidates, totalling 240 fits


0,1,2
,estimator,"XGBClassifier...ree=None, ...)"
,param_grid,"{'learning_rate': [0.1, 0.2], 'max_depth': array([ 4, 6..., 10, 12, 14]), 'n_estimators': array([100, 150]), 'subsample': [0.8, 0.9]}"
,scoring,'f1_weighted'
,n_jobs,-1
,refit,True
,cv,5
,verbose,6
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [151]:
best_xgb_params = grid_xgb.best_params_
print('best_xgb_params -- ', best_xgb_params)

best_xgb = grid_xgb.best_estimator_  
print('best_xgb -- ', best_xgb)

best_xgb_params --  {'learning_rate': 0.1, 'max_depth': np.int64(4), 'n_estimators': np.int64(100), 'subsample': 0.8}
best_xgb --  XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              feature_weights=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.1, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None,
              max_depth=np.int64(4), max_leaves=None, min_child_weight=None,
              missing=nan, monotone_constraints=None, multi_strategy=None,
              n_estimators=np.int64(100), n_jobs=None, num_parallel_tree=None, ...)


In [152]:
tuned_xgb = cross_val_score(estimator=best_xgb, X=X_train_final, y=y_train, 
                               cv=5, scoring='f1_weighted', n_jobs=-1)  

print(f'Scores Using Tuned Tuned XGBoost --- {np.round(tuned_xgb, 4)}')
print(f'Mean of Scores Using Tuned XGBoost --- {tuned_xgb.mean():.4f}')


Scores Using Tuned Tuned XGBoost --- [0.7887 0.813  0.784  0.7822 0.7975]
Mean of Scores Using Tuned XGBoost --- 0.7931


In [153]:
joblib.dump(best_xgb, 'D:\Kasban_Churn_LLM\src\models\model_XGBoost.pkl')
joblib.dump(best_forest, 'D:\Kasban_Churn_LLM\src\models\model_RF.pkl')

['D:\\Kasban_Churn_LLM\\src\\models\\model_RF.pkl']

In [154]:
xgb_model = joblib.load('D:\Kasban_Churn_LLM\src\models\model_XGBoost.pkl')
xgb_model

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [155]:
rf_model = joblib.load('D:\Kasban_Churn_LLM\src\models\model_RF.pkl')
rf_model

0,1,2
,n_estimators,np.int64(450)
,criterion,'gini'
,max_depth,np.int64(8)
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [156]:
X_train_pred=xgb_model.predict(X_train_final)

y_pred_test = xgb_model.predict(X_test_final)  

train_report = classification_report(y_train, X_train_pred)
print(f"Train Report \n {train_report}")
print("**"*40)

test_report = classification_report(y_test, y_pred_test)
print(f"Test_report \n {test_report}")

Train Report 
               precision    recall  f1-score   support

           0       0.87      0.92      0.89      4130
           1       0.74      0.60      0.66      1495

    accuracy                           0.84      5625
   macro avg       0.80      0.76      0.78      5625
weighted avg       0.83      0.84      0.83      5625

********************************************************************************
Test_report 
               precision    recall  f1-score   support

           0       0.84      0.88      0.86      1033
           1       0.62      0.54      0.58       374

    accuracy                           0.79      1407
   macro avg       0.73      0.71      0.72      1407
weighted avg       0.78      0.79      0.79      1407



In [157]:
X_train_pred=rf_model.predict(X_train_final)

y_pred_test = rf_model.predict(X_test_final)  

train_report = classification_report(y_train, X_train_pred)
print(f"Train Report \n {train_report}")
print("**"*40)

test_report = classification_report(y_test, y_pred_test)
print(f"Test_report \n {test_report}")

Train Report 
               precision    recall  f1-score   support

           0       0.86      0.93      0.89      4130
           1       0.75      0.57      0.65      1495

    accuracy                           0.83      5625
   macro avg       0.80      0.75      0.77      5625
weighted avg       0.83      0.83      0.83      5625

********************************************************************************
Test_report 
               precision    recall  f1-score   support

           0       0.83      0.90      0.86      1033
           1       0.64      0.50      0.56       374

    accuracy                           0.79      1407
   macro avg       0.74      0.70      0.71      1407
weighted avg       0.78      0.79      0.78      1407



In [158]:
sample_data = [["Male","No","No","No",66,"Yes","No","Fiber optic","Yes","No","Yes","Yes","Yes","Yes","Two year","Yes","Bank transfer (automatic)",105.65,6844.5]]
print(pd.DataFrame(sample_data,columns=['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure','PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity','OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV','StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod','MonthlyCharges', 'TotalCharges']))




  gender SeniorCitizen Partner Dependents  tenure PhoneService MultipleLines  \
0   Male            No      No         No      66          Yes            No   

  InternetService OnlineSecurity OnlineBackup DeviceProtection TechSupport  \
0     Fiber optic            Yes           No              Yes         Yes   

  StreamingTV StreamingMovies  Contract PaperlessBilling  \
0         Yes             Yes  Two year              Yes   

               PaymentMethod  MonthlyCharges  TotalCharges  
0  Bank transfer (automatic)          105.65        6844.5  
