# Telco Customer Churn

In [48]:
# set task name
task_name = 'telco_churn-v2'

# load common
%run common.ipynb

Cortex Python SDK v6.2.0
Cortex Discovery Libraries v3.5.14


## Feature Engineer
Feature Engineering uses the `Wrangle` capability

In [49]:
from ds_discovery import Wrangle, SyntheticBuilder, Commons
from sklearn.preprocessing import StandardScaler

In [50]:
wr = Wrangle.from_env(task_name, has_contract=False)

In [51]:
wr.set_source_uri(uri=SyntheticBuilder.from_env(task_name).get_persist_contract().uri)
wr.set_persist(wr.pm.file_pattern(name='wrangle', prefix='Telco-Customer-Churn', file_type='parquet'))

In [52]:
df = wr.load_source_canonical()

In [53]:
# predictors
X = df.drop(['Churn', 'CustomerID'], axis=1)
# target
y = df['Churn']

In [54]:
df.shape

(1000, 52)

In [55]:
# convert all categoricals to one-hot from k-1
# Warning: increase of dimentionality
cats = Commons.filter_headers(X, dtype='object')
df = wr.tools.model_encode_one_hot(df, headers=cats, drop_first=True, column_name='one_hot_k-1')

In [56]:
# make the variables floats
df = wr.tools.model_to_float(df, headers=X.columns.to_list(), column_name='to_float')

In [57]:
wr.run_component_pipeline()

In [44]:
num_features = df[["Tenure", "MonthlyCharges", "TotalCharges", 
                 "CLTV", "Total_Revenue"]]

In [45]:
# define data
x_num = num_features
# define standard scaler
scaler = StandardScaler()
# transform data
x_num_scaled = scaler.fit_transform(x_num)

df_num_features = pd.DataFrame(x_num_scaled)

df_num_features.set_axis(['Tenure', 'MonthlyCharges', 'TotalCharges',
                       "CLTV", "Total Revenue"], 
                      axis='columns')

Unnamed: 0,Tenure,MonthlyCharges,TotalCharges,CLTV,Total Revenue
0,0.000,0.000,0.000,0.000,0.000
1,0.000,0.000,0.000,0.000,0.000
2,0.000,0.000,0.000,0.000,0.000
3,0.000,0.000,0.000,0.000,0.000
4,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...
995,0.000,0.000,0.000,0.000,0.000
996,0.000,0.000,0.000,0.000,0.000
997,0.000,0.000,0.000,0.000,0.000
998,0.000,0.000,0.000,0.000,0.000


In [46]:
df_num_features.shape

(1000, 5)

In [59]:
df.head

<bound method NDFrame.head of      CustomerID  Churn  SeniorCitizen  Partner  Dependents  Tenure  \
0       4098999  False          0.032    1.000       0.000  18.511   
1       8076999  False          0.680    0.000       1.000  35.346   
2       7122999  False          0.305    1.000       0.000  68.002   
3       5253999   True          0.125    1.000       0.000  61.827   
4       6168999  False          0.743    0.000       1.000  27.371   
..          ...    ...            ...      ...         ...     ...   
995     6465999  False          0.058    0.000       0.000   5.691   
996     7917999  False          0.995    1.000       1.000  30.921   
997     7062999  False          0.970    1.000       1.000  65.125   
998     5520999   True          0.272    0.000       0.000  12.280   
999     3666999   True          0.136    0.000       0.000   7.775   

     PhoneService  PaperlessBilling  MonthlyCharges  TotalCharges  ChurnRate  \
0           1.000             1.000          57.3