# Telco Customer Churn

In [1]:
# set task name
task_name = 'telco_churn-v2'

# load common
%run common.ipynb

Cortex Python SDK v6.2.0a1
Cortex Discovery Libraries v3.5.14


## Data Selection
Data selection uses the `Transition` capability

In [2]:
from ds_discovery import Transition

In [3]:
tr = Transition.from_env(task_name, has_contract=False)
tr.set_source_uri('s3://lk-datasets/kaggle/Telco_customer_churn.csv')

tr.set_persist(tr.pm.file_pattern(name='select', prefix='Telco-Customer-Churn', file_type='parquet'))
tr.set_description("Telco Churn Dataset")

In [4]:
df = tr.load_source_canonical()

In [5]:
tr.canonical_report(df, stylise=False)

Unnamed: 0,Attributes (52),dType,%_Null,%_Dom,Count,Unique,Observations
0,Age,int64,0.0,0.022,7043,62,max=80 | min=19 | mean=46.51 | dominant=42
1,Avg Monthly GB Download,int64,0.0,0.217,7043,50,max=85 | min=0 | mean=20.52 | dominant=0
2,Avg Monthly Long Distance Charges,float64,0.0,0.097,7043,3584,max=49.99 | min=0.0 | mean=22.96 | dominant=0.0
3,CLTV,int64,0.0,0.001,7043,3438,max=6500 | min=2003 | mean=4400.3 | dominant=5546
4,Churn,object,0.0,0.735,7043,2,Sample: No | Yes
5,Churn Category,object,0.0,0.735,7043,6,Sample: 0 | Competitor | Attitude | Dissatisfaction | Price
6,Churn Reason,object,0.735,0.735,7043,21,Sample: Attitude of support person | Competitor offered higher download speeds | Competitor offered ...
7,Churn Score,int64,0.0,0.021,7043,85,max=100 | min=5 | mean=58.7 | dominant=80
8,Churn Score.1,int64,0.0,0.029,7043,81,max=96 | min=5 | mean=58.51 | dominant=91
9,City,object,0.0,0.043,7043,1129,Sample: Los Angeles | San Diego | San Jose | Sacramento | San Francisco


In [6]:
# select and type from observations
df = tr.tools.auto_clean_header(df, rename_map={'customerID': 'CustomerID', 'gender': 'Gender', 'tenure': 'Tenure','churn_rate':"ChurnRate"})
df = tr.tools.to_float_type(df, headers='TotalCharges')

In [8]:
df.head()

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,ChurnRate,Churn_Score,CLTV,Churn_Reason,Count,Country,State,City,Zip_Code,Lat_Long,Latitude,Longitude,Age,Under_30,Married,Referred_a_Friend,Number_of_Referrals,Offer,Avg_Monthly_Long_Distance_Charges,Avg_Monthly_GB_Download,Streaming_Music,Premium_Tech_Support,Unlimited_Data,Total_Refunds,Total_Extra_Data_Charges,Total_Long_Distance_Charges,Total_Revenue,Satisfaction_Score,Customer_Status,Churn_Score.1,Churn_Category
0,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,86,3239,Competitor made better offer,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964,-118.273,37,No,No,No,0,,10.47,21,No,No,Yes,0.0,0,20.94,129.09,1,Churned,86,Competitor
1,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,67,2701,Moved,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059,-118.307,19,Yes,No,No,0,,9.12,51,No,No,Yes,0.0,0,18.24,169.89,2,Churned,67,Other
2,9305-CDSKC,Female,0,No,No,8,Yes,Yes,Fiber optic,No,No,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,1,86,5372,Moved,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048,-118.294,31,No,No,No,0,,12.15,26,Yes,No,Yes,0.0,0,97.2,917.7,3,Churned,86,Other
3,7892-POOKP,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,No,Yes,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes,1,84,5003,Moved,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062,-118.316,23,Yes,Yes,No,0,Offer C,4.89,47,Yes,Yes,Yes,0.0,0,136.92,3182.97,3,Churned,84,Other
4,0280-XJGEX,Male,0,No,No,49,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3,Yes,1,89,5340,Competitor had better devices,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039,-118.266,38,No,No,No,0,,44.33,11,Yes,No,Yes,0.0,0,2172.17,7208.47,1,Churned,89,Competitor


In [9]:
# turn yes/no to bool to guarantee 0/1 distribution
yn = []
for col in df.columns:
    if df[col].nunique() == 2 and 'Yes' in df[col].unique().tolist():
        yn.append(col)
df = tr.tools.to_bool_type(df, headers=yn, bool_map={'Yes': True})

In [11]:
# run the pipeline  
tr.run_component_pipeline()

In [12]:
df.head()

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,Tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,ChurnRate,Churn_Score,CLTV,Churn_Reason,Count,Country,State,City,Zip_Code,Lat_Long,Latitude,Longitude,Age,Under_30,Married,Referred_a_Friend,Number_of_Referrals,Offer,Avg_Monthly_Long_Distance_Charges,Avg_Monthly_GB_Download,Streaming_Music,Premium_Tech_Support,Unlimited_Data,Total_Refunds,Total_Extra_Data_Charges,Total_Long_Distance_Charges,Total_Revenue,Satisfaction_Score,Customer_Status,Churn_Score.1,Churn_Category
0,3668-QPYBK,Male,0,False,False,2,True,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,True,Mailed check,53.85,108.15,True,1,86,3239,Competitor made better offer,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964,-118.273,37,False,False,False,0,,10.47,21,False,False,True,0.0,0,20.94,129.09,1,Churned,86,Competitor
1,9237-HQITU,Female,0,False,False,2,True,No,Fiber optic,No,No,No,No,No,No,Month-to-month,True,Electronic check,70.7,151.65,True,1,67,2701,Moved,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059,-118.307,19,True,False,False,0,,9.12,51,False,False,True,0.0,0,18.24,169.89,2,Churned,67,Other
2,9305-CDSKC,Female,0,False,False,8,True,Yes,Fiber optic,No,No,Yes,No,Yes,Yes,Month-to-month,True,Electronic check,99.65,820.5,True,1,86,5372,Moved,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048,-118.294,31,False,False,False,0,,12.15,26,True,False,True,0.0,0,97.2,917.7,3,Churned,86,Other
3,7892-POOKP,Female,0,True,False,28,True,Yes,Fiber optic,No,No,Yes,Yes,Yes,Yes,Month-to-month,True,Electronic check,104.8,3046.05,True,1,84,5003,Moved,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062,-118.316,23,True,True,False,0,Offer C,4.89,47,True,True,True,0.0,0,136.92,3182.97,3,Churned,84,Other
4,0280-XJGEX,Male,0,False,False,49,True,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,True,Bank transfer (automatic),103.7,5036.3,True,1,89,5340,Competitor had better devices,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039,-118.266,38,False,False,False,0,,44.33,11,True,False,True,0.0,0,2172.17,7208.47,1,Churned,89,Competitor


In [13]:
df.shape

(7043, 52)