In [3]:
import pandas as pd
import numpy as np
import pickle

In [2]:
data = pd.read_csv('data/WA_Fn-UseC_-Telco-Customer-Churn.csv')
print(data.shape)
data.head()

(7043, 21)


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [24]:
loaded_models ={}
with open('churn-models.bin', 'rb') as f_in:
    loaded_models['xgb'],loaded_models['Lgb'],loaded_models['Logistic_reg'],loaded_models['bayes'] = pickle.load(f_in)

def preprocessing_single(single_dict):
    df = pd.DataFrame(single_dict,index=[0])

    df.columns = df.columns.str.lower().str.replace(' ', '_')

    string_columns = list(df.dtypes[df.dtypes == 'object'].index)

    for col in string_columns:
        df[col] = df[col].str.lower().str.replace(' ', '_')

    df.churn = (df.churn == 'yes').astype(int)
    
    df['totalcharges'] = pd.to_numeric(df['totalcharges'], errors='coerce')
    df['totalcharges'] = df['totalcharges'].fillna(0)
    return df

categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection',
               'techsupport', 'streamingtv', 'streamingmovies',
               'contract', 'paperlessbilling', 'paymentmethod']
numerical = ['tenure', 'monthlycharges', 'totalcharges']

def predict(df, dv, model):
    cat = df[categorical + numerical].to_dict(orient='records')
    
    X = dv.transform(cat)

    y_pred = model.predict_proba(X)[:, 1]

    return y_pred

def predict_single(trained_models,df_single):
    preds_table_single = pd.DataFrame()
    for model_name in trained_models: 
            #print(f"==========={model_name}==========")
            model = trained_models[model_name]['model_']
            dv = trained_models[model_name]['dv_']
            
            y_pred_single = predict(df_single, dv, model)
            
            preds_table_single[model_name] = y_pred_single
            #preds_single =  (y_pred_single>= 0.5)*1
    p_df = preds_table_single.copy()
    p_df['blend3'] = 0.4* p_df.Logistic_reg + 0.4*p_df.bayes + 0.1*p_df.xgb + 0.1*p_df.Lgb
    #p_df['blend10'] = 0.3* p_df.Logistic_reg + 0.5*p_df.bayes + 0.1*p_df.xgb + 0.1*p_df.Lgb
    preds_single =  (p_df['blend3']>= 0.5)*1
    
    return     p_df['blend3'].values[0] , preds_single[0]

In [18]:
d = data.iloc[1].to_dict()

In [19]:
d

{'customerID': '5575-GNVDE',
 'gender': 'Male',
 'SeniorCitizen': 0,
 'Partner': 'No',
 'Dependents': 'No',
 'tenure': 34,
 'PhoneService': 'Yes',
 'MultipleLines': 'No',
 'InternetService': 'DSL',
 'OnlineSecurity': 'Yes',
 'OnlineBackup': 'No',
 'DeviceProtection': 'Yes',
 'TechSupport': 'No',
 'StreamingTV': 'No',
 'StreamingMovies': 'No',
 'Contract': 'One year',
 'PaperlessBilling': 'No',
 'PaymentMethod': 'Mailed check',
 'MonthlyCharges': 56.95,
 'TotalCharges': '1889.5',
 'Churn': 'No'}

In [21]:
data.iloc[0]

customerID                7590-VHVEG
gender                        Female
SeniorCitizen                      0
Partner                          Yes
Dependents                        No
tenure                             1
PhoneService                      No
MultipleLines       No phone service
InternetService                  DSL
OnlineSecurity                    No
OnlineBackup                     Yes
DeviceProtection                  No
TechSupport                       No
StreamingTV                       No
StreamingMovies                   No
Contract              Month-to-month
PaperlessBilling                 Yes
PaymentMethod       Electronic check
MonthlyCharges                 29.85
TotalCharges                   29.85
Churn                             No
Name: 0, dtype: object

In [25]:
ps = preprocessing_single(single_dict=d)
predict_single(trained_models=loaded_models,df_single=ps)

(0.03211045187961432, 0)