In [7]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

from sklearn.linear_model import LogisticRegression

# For saving ML models
import pickle

# Building a Flask API for ML Logistic Regression Model

## Step 1 - Preprocessing!

In [8]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')

X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [9]:
# Encode labels and save to pickle file

labelEncoder = LabelEncoder()
X[:, 1] = labelEncoder.fit_transform(X[:, 1])
filename = 'labelEncoder1.pickle'
pickle.dump(labelEncoder, open(filename, 'wb'))

labelEncoder = LabelEncoder()
X[:, 2] = labelEncoder.fit_transform(X[:, 2])
filename = 'labelEncoder2.pickle'
pickle.dump(labelEncoder, open(filename, 'wb'))

In [10]:
# Split into train and test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [11]:
# Scale our data and save too pickle file

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

filename = 'standardScaler.pickle'
pickle.dump(sc, open(filename, 'wb'))

## Step 2 - Define Model

In [12]:
log_reg = LogisticRegression(solver='lbfgs')
log_reg.fit(X_train, y_train)

# Save to pickle file
filename = 'log_reg_model.pickle'
pickle.dump(log_reg, open(filename, 'wb'))

## Step 3 - Evaluate the model

In [141]:
y_pred = log_reg.predict(X_test)

In [142]:
print(f'confusion matrix: \n{metrics.confusion_matrix(y_pred, y_test)}')
print(f'accuracy: {metrics.accuracy_score(y_pred, y_test)}')
print(f'recall: {metrics.recall_score(y_pred, y_test)}')

confusion matrix: 
[[1533  359]
 [  42   66]]
accuracy: 0.7995
recall: 0.6111111111111112


## Test to see if we can load the model and predict again

In [143]:
filename = 'log_reg_model.pickle'
model = pickle.load(open(filename, 'rb'))

In [144]:
y_pred = model.predict(X_test)
metrics.accuracy_score(y_pred, y_test)

0.7995

**Success!!**

## Time to build the Flask API
Find the code under the flask folder in the app.py script

In [35]:
test_input = "19,15661507,Muldrow,587,Spain,Male,45,6,0,1,0,0,158684.81,0"

In [36]:
test_input = np.array(test_input.split(','))

In [37]:
test_input

array(['19', '15661507', 'Muldrow', '587', 'Spain', 'Male', '45', '6',
       '0', '1', '0', '0', '158684.81', '0'], dtype='<U9')

In [38]:
len(test_input)

14

In [31]:
from io import StringIO

test_input = StringIO(test_input)

In [42]:
test = labelEncoder.transform(['Male'])

In [43]:
test

array([1])

In [33]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,191566,"1507,Muldrow,587,Spain,Male,45,6,0,1,",,,,,,,,,,,
