**Initialization**
- I use these three lines of code on top of my each notebooks because it will help to prevent any problems while reloading the same project. And the third line of code helps to make visualization within the notebook.

In [5]:
#@ INITIALIZATION: 
%reload_ext autoreload
%autoreload 2
%matplotlib inline

**Downloading Libraries and Dependencies**
- I have downloaded all the libraries and dependencies required for the project in one particular cell.

In [6]:
#@ IMPORTING MODULES: 
import pickle
import requests
import time
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

**Getting the Dataset**

In [7]:
#@ GETTING THE DATASET: UNCOMMENT BELOW:
# data = "https://raw.githubusercontent.com/alexeygrigorev/mlbookcamp-code/master/chapter-03-churn-prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv"
# !wget $data -O data.csv

**Processing the Dataset**

In [8]:
#@ PROCESSING THE DATASET:
df = pd.read_csv('data.csv')                                                    # Reading the dataset. 
df.columns = df.columns.str.lower().str.replace(' ', '_')                       # Preparing columns. 
categorical_columns = list(df.dtypes[df.dtypes == 'object'].index)              # Index of categorical columns.
for c in categorical_columns:
    df[c] = df[c].str.lower().str.replace(' ', '_')
df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce')               # Converting numerical column.
df.totalcharges = df.totalcharges.fillna(0)
df.churn = (df.churn == 'yes').astype(int)
df.head()                                                                       # Inspecting dataframe.

Unnamed: 0,customerid,gender,seniorcitizen,partner,dependents,tenure,phoneservice,multiplelines,internetservice,onlinesecurity,...,deviceprotection,techsupport,streamingtv,streamingmovies,contract,paperlessbilling,paymentmethod,monthlycharges,totalcharges,churn
0,7590-vhveg,female,0,yes,no,1,no,no_phone_service,dsl,no,...,no,no,no,no,month-to-month,yes,electronic_check,29.85,29.85,0
1,5575-gnvde,male,0,no,no,34,yes,no,dsl,yes,...,yes,no,no,no,one_year,no,mailed_check,56.95,1889.5,0
2,3668-qpybk,male,0,no,no,2,yes,no,dsl,yes,...,no,no,no,no,month-to-month,yes,mailed_check,53.85,108.15,1
3,7795-cfocw,male,0,no,no,45,no,no_phone_service,dsl,yes,...,yes,yes,no,no,one_year,no,bank_transfer_(automatic),42.3,1840.75,0
4,9237-hqitu,female,0,no,no,2,yes,no,fiber_optic,no,...,no,no,no,no,month-to-month,yes,electronic_check,70.7,151.65,1


In [9]:
#@ PREPARING THE DATASET:
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)        # Splitting the dataset.
numerical = ['tenure', 'monthlycharges', 'totalcharges']                            # Numerical columns.
categorical = [
    'gender',
    'seniorcitizen',
    'partner',
    'dependents',
    'phoneservice',
    'multiplelines',
    'internetservice',
    'onlinesecurity',
    'onlinebackup',
    'deviceprotection',
    'techsupport',
    'streamingtv',
    'streamingmovies',
    'contract',
    'paperlessbilling',
    'paymentmethod',
]                                                                                   # Categorical columns.

**Training Model**

In [10]:
#@ FUNCTION TO TRAIN THE MODEL:
def train(df_train, y_train, C=1.0):                                                # Defining function.
    dicts = df_train[categorical + numerical].to_dict(orient="records")             # Creating dictionary.
    dv = DictVectorizer(sparse=False)                                               # Initialization.
    X_train = dv.fit_transform(dicts)                                               # Vectorization.
    model = LogisticRegression(C=C, max_iter=1000)                                  # Initializing logistic regression.
    model.fit(X_train, y_train)                                                     # Training the model.
    return dv, model

#@ FUNCTION FOR PREDICTION:
def predict(df, dv, model):
    dicts = df[categorical + numerical].to_dict(orient="records")             # Creating dictionary.
    X = dv.transform(dicts)                                                   # Vectorization.
    y_pred = model.predict_proba(X)[:, 1]                                     # Generating predictions.
    return y_pred

#@ INITIALIZING PARAMETERS:
C = 1.0
n_splits = 5

In [11]:
#@ INITIALIZING KFOLD CROSS VALIDATION:
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=2022)           # Initializing KFold.
scores = [] 
for train_idx, val_idx in kfold.split(df_full_train):
    df_train = df_full_train.iloc[train_idx]                                # Training data.
    df_val = df_full_train.iloc[val_idx]                                    # Validation data.
    y_train = df_train.churn.values
    y_val = df_val.churn.values
    dv, model = train(df_train, y_train, C=C)                               # Training the model.
    y_pred = predict(df_val, dv, model)                                     # Getting predictions. 
    auc = roc_auc_score(y_val, y_pred)                                      # Getting roc auc.
    scores.append(auc)
print("C=%s %.3f +- %.3f" % (C, np.mean(scores), np.std(scores)))           # Inspection.

C=1.0 0.843 +- 0.007


In [12]:
#@ INSPECTING SCORES:
scores

[0.8440837773467234,
 0.8532735916597619,
 0.8465001629726205,
 0.8350742949008101,
 0.8353297390217221]

In [13]:
#@ TRAINING FINAL MODEL:
dv, model = train(df_full_train, df_full_train.churn.values, C=1.0)     # Training the model.
y_pred = predict(df_test, dv, model)                                    # Model predictions.
auc = roc_auc_score(df_test.churn.values, y_pred)                       # Inspecting auc roc.
auc

0.8572386167896259

**Saving Model**

In [14]:
#@ SAVING THE MODEL:
output_file = f"model_C={C}.bin"                        # Initialization. 
f_out = open(output_file, "wb")                         # Opening file.
pickle.dump((dv, model), f_out)
f_out.close()                                           # Closing file.
output_file

'model_C=1.0.bin'

In [15]:
#@ LOADING THE MODEL:
input_file = "model_C=1.0.bin"
with open(input_file, "rb") as f_in:
    (dv, model) = pickle.load(f_in)                     # Loading model.
dv, model                                               # Inspection.

(DictVectorizer(sparse=False), LogisticRegression(max_iter=1000))

**Model Evaluation**

In [16]:
#@ INITIALIZING MODEL EVALUATION:
customer = {
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'yes',
    'dependents': 'no',
    'phoneservice': 'no',
    'multiplelines': 'no_phone_service',
    'internetservice': 'dsl',
    'onlinesecurity': 'no',
    'onlinebackup': 'yes',
    'deviceprotection': 'no',
    'techsupport': 'no',
    'streamingtv': 'no',
    'streamingmovies': 'no',
    'contract': 'month-to-month',
    'paperlessbilling': 'yes',
    'paymentmethod': 'electronic_check',
    'tenure': 1,
    'monthlycharges': 29.85,
    'totalcharges': 29.85
}
X = dv.transform([customer])

In [17]:
#@ MODEL PREDICTION:
model.predict_proba(X)[0, 1]

0.6363584152704198

**Testing Churn App**

In [18]:
#@ TESTING CHURN APP:
url = 'http://localhost:9696/predict'                # Initializing url.

In [19]:
#@ INITIALIZATION:
customer = {
    "gender": "female",
    "seniorcitizen": 0,
    "partner": "yes",
    "dependents": "no",
    "phoneservice": "no",
    "multiplelines": "no_phone_service",
    "internetservice": "dsl",
    "onlinesecurity": "no",
    "onlinebackup": "yes",
    "deviceprotection": "no",
    "techsupport": "no",
    "streamingtv": "no",
    "streamingmovies": "no",
    "contract": "month-to-month",
    "paperlessbilling": "yes",
    "paymentmethod": "electronic_check",
    "tenure": 1,
    "monthlycharges": 29.85,
    "totalcharges": 29.85
}

In [None]:
#@ TESTING CHURN APP:
response = requests.post(url, json=customer).json()
response

In [None]:
#@ INSPECTING RESPONSE:
if response["churn"]:
    print("sending email to %s" % ('xyz'))