# Predicting Customer Churn - Telco 

In [None]:
%%bash
cp ../../seldon-core/components/alibi-explain-server/pyproject.toml .
cp ../../seldon-core/components/alibi-explain-server/poetry.lock .

conda create --yes --prefix ./venv python=3.7.10

In [None]:
%%bash 

source ~/anaconda3/etc/profile.d/conda.sh
conda activate ./venv
poetry install

In [None]:
!python --version
!which python

In [None]:
!pip install imblearn
!pip install seaborn
!pip install seldon_deploy_sdk

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from imblearn.over_sampling import RandomOverSampler

import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, average_precision_score

from xgboost.sklearn import XGBClassifier
from xgboost import plot_importance, to_graphviz

from seldon_deploy_sdk import Configuration, ApiClient, SeldonDeploymentsApi, ModelMetadataServiceApi, DriftDetectorApi, BatchJobsApi, BatchJobDefinition
from seldon_deploy_sdk.auth import OIDCAuthenticator

from alibi.explainers import AnchorTabular

In [None]:
!gsutil cp gs://kelly-seldon/customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv WA_Fn-UseC_-Telco-Customer-Churn.csv

In [None]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()

The dataset consists of the following columns:

* customerID

Services that a customer has signed up for:

* PhoneService
* MultipleLines
* InternetService
* OnlineSecurity
* OnlineBackup
* DeviceProtection
* TechSupport
* StreamingTV
* StreamingMovies

Customer account information:

* tenure - how long they've been a customer
* Contract 
* PaymentMethod
* PaperlessBilling
* MonthlyCharges
* TotalCharges

Demographic information about a customer:

* gender
* SeniorCitizen
* Partner
* Dependents

Label:

* Churn - customer who left within the last month

In [None]:
df.value_counts("Churn")

In [None]:
is_NaN = df.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df[row_has_NaN]
print(rows_with_NaN.head(), "\n\n", "Number of rows with missing values:", len(rows_with_NaN))

In [None]:
df.dtypes

In [None]:
df.drop("customerID", axis=1, inplace=True)

In [None]:
df = df[df["TotalCharges"] != " "]
df["TotalCharges"] = df["TotalCharges"].astype(float)

In [None]:
cols = ["gender", "SeniorCitizen", "Partner", "Dependents", 'PhoneService', 'MultipleLines', 'InternetService', 
        'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod'] 

for col in cols:
    plt = sns.catplot(x=col,hue='Churn',data=df,kind="count")
    plt.set_xticklabels(rotation=40, ha="right")

In [None]:
df["Churn"].value_counts()

In [None]:
train, test = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
train["Churn"] = pd.Series(np.where(train["Churn"].values == 'Yes', 1, 0), train["Churn"].index)

In [None]:
train_X = train.loc[:, train.columns != "Churn"]
train_y = train["Churn"] 

In [None]:
weights = (train_y == 0).sum() / (1.0 * (train_y == 1).sum())

In [None]:
train_X.columns

In [None]:
bin_cols = ["Partner", "Dependents", "PaperlessBilling", "PhoneService"]

def encode_binary(df):
    
    for col in bin_cols:
        df[col] = pd.Series(np.where(df[col].values == 'Yes', 1, 0), df.index)
    
    df["gender"] = pd.Series(np.where(df["gender"].values == 'Male', 1, 0), df.index)
    
    return df

In [None]:
train_X = encode_binary(train_X)

In [None]:
train_X = pd.get_dummies(train_X, columns=['MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaymentMethod'])

In [None]:
test["Churn"] = pd.Series(np.where(test["Churn"].values == 'Yes', 1, 0), test["Churn"].index)

test_X = test.loc[:, test.columns != "Churn"]
test_y = test["Churn"] 

In [None]:
test_X = encode_binary(test_X)

In [None]:
test_X = pd.get_dummies(test_X, columns=['MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaymentMethod'])

In [None]:
model = XGBClassifier(max_depth=9, scale_pos_weight=weights)

In [None]:
probabilities = model.fit(train_X, train_y).predict_proba(test_X)

In [None]:
probabilities

In [None]:
print('AUPRC = {}'.format(average_precision_score(test_y, probabilities[:, 1])))

In [None]:
# make predictions for test data
y_pred = model.predict_proba(test_X.iloc[:1])
# predictions = [round(value) for value in y_pred]

In [None]:
y_pred

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
predictions = [np.argmax(prob) for prob in probabilities]

In [None]:
# evaluate predictions
accuracy = accuracy_score(test_y, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
conf_mat = confusion_matrix(y_true=test_y, y_pred=predictions)
print('Confusion matrix:\n', conf_mat)

labels = ['Class 0', 'Class 1']
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(conf_mat, cmap=plt.cm.Blues)
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('Expected')
plt.show()

In [None]:
fig = plt.figure(figsize = (14, 9))
ax = fig.add_subplot(111)

colours = plt.cm.Set1(np.linspace(0, 1, 9))

ax = plot_importance(model, height = 1, color = colours, grid = False, \
                     show_values = False, importance_type = 'cover', ax = ax);
for axis in ['top','bottom','left','right']:
            ax.spines[axis].set_linewidth(2)
        
ax.set_xlabel('importance score', size = 16);
ax.set_ylabel('features', size = 16);
ax.set_yticklabels(ax.get_yticklabels(), size = 12);
ax.set_title('Plotting the models most important features', size = 16);

In [None]:
model.save_model('model.bst')

In [None]:
YOUR_NAME = "ks-poetry-2"

In [None]:
!gsutil cp model.bst gs://kelly-seldon/customer-churn/models/{YOUR_NAME}/model.bst

## Deploy model using Seldon Deploy SDK

In [None]:
SD_IP = "34.141.146.222"

config = Configuration()
config.host = f"http://{SD_IP}/seldon-deploy/api/v1alpha1"
config.oidc_client_id = "sd-api"
config.oidc_server = f"http://{SD_IP}/auth/realms/deploy-realm"
config.oidc_client_secret = "sd-api-secret"
config.auth_method = "client_credentials"

def auth():
    auth = OIDCAuthenticator(config)
    config.id_token = auth.authenticate()
    api_client = ApiClient(configuration=config, authenticator=auth)
    return api_client

In [None]:
# MUST BE ALL LOWERCASE WITH NO UNDERSCORES
DEPLOYMENT_NAME = f"{YOUR_NAME}-churn"
MODEL_URI = f"gs://kelly-seldon/customer-churn/models/{YOUR_NAME}"

NAMESPACE = "seldon-gitops"

CPU_REQUESTS = "0.1"
MEMORY_REQUESTS = "1Gi"

CPU_LIMITS = "0.1"
MEMORY_LIMITS = "1Gi"

In [None]:
mldeployment = {
    "kind": "SeldonDeployment",
    "metadata": {
        "name": DEPLOYMENT_NAME,
        "namespace": NAMESPACE,
        "labels": {
            "fluentd": "true"
        }
    },
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "spec": {
        "name": DEPLOYMENT_NAME,
        "annotations": {
            "seldon.io/engine-seldon-log-messages-externally": "true"
        },
        "protocol": "seldon",
        "transport": "rest",
        "predictors": [
            {
                "componentSpecs": [
                    {
                        "spec": {
                            "containers": [
                                {
                                    "name": f"{DEPLOYMENT_NAME}-container",
                                    "resources": {
                                        "requests": {
                                            "cpu": CPU_REQUESTS,
                                            "memory": MEMORY_REQUESTS
                                        },
                                        "limits": {
                                            "cpu": CPU_LIMITS,
                                            "memory": MEMORY_LIMITS
                                        }
                                    }
                                }
                            ]
                        }
                    }
                ],
                "name": "default",
                "replicas": 1,
                "traffic": 100,
                "graph": {
                    "implementation": "XGBOOST_SERVER",
                    "modelUri": MODEL_URI,
                    "name": f"{DEPLOYMENT_NAME}-container",
                    "endpoint": {
                        "type": "REST"
                    },
                    "parameters": [],
                    "children": [],
                    "logger": {
                        "mode": "all"
                    }
                }
            }
        ]
    },
    "status": {}
}

In [None]:
deployment_api = SeldonDeploymentsApi(auth())
deployment_api.create_seldon_deployment(namespace=NAMESPACE, mldeployment=mldeployment)

Example request:

```
{
    "data": {
        "ndarray": [
            [0, 0, 0, 0, 19, 0, 24.7, 465.85,
                1, 0, 0, 1, 0, 0, 1,
                0, 1, 0, 0, 1, 0, 0, 1,
                0, 0, 1, 0, 0, 1, 0, 0,
                1, 0, 1, 0, 0, 1, 0, 0,
                0
            ]
        ]
    }
}
```

In [None]:
test_X[2:3]

In [None]:
train_X

In [None]:
test_X.columns

In [None]:
test_X[2:3].values

```
{
    "data": {
        "ndarray": [
            [1, 0, 1, 0, 13, 1, 1, 102.25, 
            1359.0, 0, 0, 1, 0, 1, 0,
            1, 0, 0, 1, 0, 0, 0, 0, 1,
            1, 0, 0, 0, 0, 1, 0, 0, 1,
            1, 0, 0, 0, 1, 0, 0]
        ]
    }
}
```

```
{
    "data": {
        "ndarray": [
            [0, 0, 0, 0, 18, 1, 1, 95.05, 1679.4,
            0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 
            0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 
            0, 1, 0, 0, 0]
        ]
    }
}
```

In [None]:
test_y

In [None]:
prediction_schema = {
    "requests": [
        {
            "name": "gender",
            "type": "CATEGORICAL",
            "dataType": "INT",
            "nCategories": "2",
            "categoryMap": {
                "0": "Female",
                "1": "Male"
            }
        },
        {
            "name": "SeniorCitizen",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "Partner",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "Dependents",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "tenure",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PhoneService",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PaperlessBilling",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "MonthlyCharges",
            "type": "REAL",
            "dataType": "FLOAT"
        },
        {
            "name": "TotalCharges",
            "type": "REAL",
            "dataType": "FLOAT"
        },
        {
            "name": "MultipleLines_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "MultipleLines_No phone service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "MultipleLines_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "InternetService_DSL",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "InternetService_Fiber optic",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "InternetService_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineSecurity_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineSecurity_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineSecurity_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineBackup_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineBackup_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "OnlineBackup_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "DeviceProtection_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "DeviceProtection_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "DeviceProtection_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "TechSupport_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "TechSupport_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "TechSupport_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingTV_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingTV_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingTV_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingMovies_No",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingMovies_No internet service",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "StreamingMovies_Yes",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "Contract_Month-to-month",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "Contract_One year",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "Contract_Two year",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PaymentMethod_Bank transfer (automatic)",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PaymentMethod_Credit card (automatic)",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PaymentMethod_Electronic check",
            "type": "REAL",
            "dataType": "INT"
        },
        {
            "name": "PaymentMethod_Mailed check",
            "type": "REAL",
            "dataType": "INT"
        }
    ],
    "responses": [{
        "name": "Likelihood of Churn",
        "type": "REAL",
        "dataType": "FLOAT",
    }]
}

In [None]:
model_catalog_metadata = {
      "URI": MODEL_URI,
      "name": f"{DEPLOYMENT_NAME}-model",
      "version": "v1.0",
      "artifactType": "XGBOOST",
      "taskType": "Customer churn classification",
      "tags": {
        "auto_created": "true",
        "author": f"{YOUR_NAME}"
      },
      "metrics": {},
      "project": "default",
      "prediction_schema": prediction_schema
    }

model_catalog_metadata

In [None]:
metadata_api = ModelMetadataServiceApi(auth())
metadata_api.model_metadata_service_update_model_metadata(model_catalog_metadata)

In [None]:
metadata_response = metadata_api.model_metadata_service_list_model_metadata(uri=MODEL_URI)
metadata_response

### Create an Explainer

In [None]:
def predict_fn(x):
    return model.predict_proba(x)

In [None]:
columns = list(train_X.columns)
explainer = AnchorTabular(predict_fn, columns)

In [None]:
Xchurn = train_X.loc[train_y == 1]
XnonChurn = train_X.loc[train_y == 0]

In [None]:
balanced_set = pd.concat([Xchurn, XnonChurn.iloc[:len(Xchurn)]]).to_numpy()

In [None]:
explainer.fit(balanced_set, disc_perc=(25, 50, 75)) 

In [None]:
idx = 36

testX_array = test_X.to_numpy()

class_names = ["Non-Churn", "Churn"]
print('Prediction: ', class_names[explainer.predictor(testX_array[idx].reshape(1, -1))[0]])

explanation = explainer.explain(testX_array[idx], threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation.anchor)))
print('Precision: %.2f' % explanation.precision)
print('Coverage: %.2f' % explanation.coverage)

In [None]:
explainer.save("poetry-churn-explainer")

In [None]:
!gsutil cp -r churn-explainer gs://kelly-seldon/customer-churn/models/{YOUR_NAME}/poetry-churn-explainer

### Deplying the Explainer

In [None]:
EXPLAINER_TYPE = "AnchorTabular"
EXPLAINER_URI = f"gs://kelly-seldon/customer-churn/models/{YOUR_NAME}/poetry-churn-explainer"

explainer_spec = {
    "type": EXPLAINER_TYPE,
    "modelUri": EXPLAINER_URI,
    "containerSpec": {
        "name": "",
        "resources": {}
    }
}

In [None]:
mldeployment['spec']['predictors'][0]['explainer'] = explainer_spec

In [None]:

deployment_api = SeldonDeploymentsApi(auth())
deployment_api.create_seldon_deployment(namespace=NAMESPACE, mldeployment=mldeployment)