# Build a Recommendation System for Purchase Data

The scope of this notebook is 

- Prepare the Scoring Function
- Unit Test the Score
- Build Flask Scoring App and deploy as a Web End point
- Build Dash App and deploy as Interactive Web Services

The Business case is an mobile app allowing its customers to place orders before they even have to walk into the store.
When a customer first taps on the “order” page, we may recommend 

- Personalized recommendation with ranked list of items (product IDs) that the user is most likely to want to put in his/her (empty) “basket”

Assuming that the scenario is ModelOps 0. 

Then: 

1. Data scientists hand over a trained model as an artifact to the engineering team for deployement
2. The handoff can include putting the trained model in the models registry
3. The Scoring process is in Batch on a sigle EC2 instance

We have to reproduce the required development enviroment

0. We get the last version of Champion Model (optional)

1. Define Scoring Functions: Batch scoring is the main assumption

    - Define the get_top_items function 
    - Define the get_top_n_ui function
    
2. Unit Test 

3. Deploy Model as Scoring Web EndPoint

4. Deploy Model as Interactive Web Service


## Settings

### Import libraries

In [1]:
#Data
import sqlalchemy as sql

#Data Science
import numpy as np
import pandas as pd
import sklearn
import scipy
import surprise
from surprise import dump

#Model Tracking
import mlflow
from mlflow.tracking import MlflowClient

#ML engineering
import flask
from flask import jsonify

#Utils
import os
import glob
import shutil
import logging
from collections import defaultdict
import configparser
import json
import pickle
import unittest
import docker
import pprint
import time
import requests

#Settings
from pprint import pprint
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings
warnings.filterwarnings("ignore")

### Set enviroment variables

In [2]:
# Enviroment variables
outmodels = '../models/'
app_folder = '../src/score_app'
dash_folder = '../dash_app'

# Set dbconnection variables
dbconnPath = './dbconn.properties'
config = configparser.RawConfigParser()
config.read(dbconnPath)
params = config
db_host=params.get('CONN', 'host')
db_port=params.get('CONN', 'port')
db_user=params.get('CONN', 'user')
db_pwd=params.get('CONN', 'password')
db_name=params.get('CONN', 'database')

# Set connection string
connection_str = f'mysql+pymysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{db_name}'

### Download Model Artefact from Mlflow server

In [None]:
client = MlflowClient()
for regmodel in client.list_registered_models():
    regmodel_info = dict(regmodel)

# pprint(regmodel_info, indent=3)

champion=client.get_registered_model('Champion')
championid=champion.latest_versions[-1].run_id

art_list = [arts.path for arts in client.list_artifacts(championid, path=None)]

for art_path in art_list: 
    client.download_artifacts(championid, art_path, outmodels)

### Analyze the Model Artifact

In [None]:
modelpkl = [modelpath for modelpath in glob.glob(outmodels + 'model/*.pkl')][0]
modelpkl

predictions, algo = dump.load(modelpkl)

print('\n')
print('Sample of Predictions: ')
print('\n', predictions[0:10])
print('\n', 'Number of predictions:', len(predictions))

## Machine Learning Engineering

### Scoring Function

We have to return Top 10 Recommended Items by userid

#### Define the scoring function

In [None]:
def get_top(predictions, n=10):
    
    '''
    Returns the the top-N recommendation from a set of predictions
    
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

def get_top_n_ui(top, uid):
    try:
        top_n_ui = [[iid for (iid, _) in user_ratings] for UID, user_ratings in top.items() if UID==uid][0]
        return top_n_ui
    except ValueError: # user was not part of the trainset
        return 0

#### Unit test

In [None]:
class TestScoreFunction(unittest.TestCase):
    
    def setUp(self):
        self.testcase = "100"
        self.expected = ['6', '118', '67', '27', '0', '51', '282', '62', '24', '201']
    
    def test_empty(self):
        self.assertTrue(bool(get_top_n_ui(get_top(predictions), self.testcase)))

    def test_basic(self):
        self.assertEqual(get_top_n_ui(get_top(predictions), self.testcase), self.expected)
        
unittest.main(argv = ['first-arg-is-ignored'], exit = False)

### Model Deployment: Scoring Web EndPoint

Because we're testing, I need a app folder with the model and: 

1. app.py
2. requirements.txt
3. Dockerfile

Then run the application with Docker Client and test it 

In [None]:
class TestScoreFunction(unittest.TestCase):
    
    def setUp(self):
        self.testcase = "100"
        self.expected = ['6', '118', '67', '27', '0', '51', '282', '62', '24', '201']
    
    def test_empty(self):
        self.assertTrue(bool(get_top_n_ui(get_top(predictions), self.testcase)))

    def test_basic(self):
        self.assertEqual(get_top_n_ui(get_top(predictions), self.testcase), self.expected)
        
unittest.main(argv = ['first-arg-is-ignored'], exit = False)

#### Create a app folder

In [3]:
if not os.path.exists(app_folder):
    os.makedirs(app_folder)

#### Copy the model

In [None]:
if not os.path.exists(app_folder + '/model'):
    shutil.copytree(src=outmodels + 'model', dst=app_folder + '/model')

In [4]:
os.chdir(app_folder)

#### Write the app.py

In [None]:
%%writefile score_app.py

# -*- coding: utf-8 -*-

import os
import logging
from collections import defaultdict

import pandas as pd
from surprise import dump

import flask

import warnings
warnings.filterwarnings("ignore")

#create an instance
app = flask.Flask(__name__)

def locate_model(dest):
    
    """ 
    return path of binary model
    args:
       dest: folder for searching
    returns:
       model_path
    """

    for dirpath, dirnames, filenames in os.walk(dest):
        for filename in [f for f in filenames if f.endswith((".pkl", ".pickle"))]:
            model_path = os.path.join(dirpath, filename)
            return model_path
    return None

def model_reader(model_path):
    """ 
    return predictions and model class
    args:
       model_path: pickle file path
    returns:
       predictions and model
    """
    predictions, algo = dump.load(model_path)
    return predictions, algo

def get_top(predictions, n=10):
    
    '''
    Returns the the top-N recommendation from a set of predictions
    args:
       predictions: predictions generated in testing phase
       n: number of items to suggest (default=10)
    return:
       top_n dictionary: user-prediction dictionaries
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

def get_top_n_ui(top, uid):
    '''
    Returns the list of selected items
    args:
       top: user-prediction dictionaries
       uid: user id for filtering
    return:
       top_n dictionary: user-prediction dictionaries
    '''
    try:
        top_n_ui = [[iid for (iid, _) in user_ratings] for UID, user_ratings in top.items() if UID==uid][0]
        return top_n_ui
    except ValueError: 
        return 0

@app.route('/predict', methods=['GET','POST'])
def predict():
    
    logging.info('Scoring Application is starting to process the request')
    
    #Intiate variables
    data = defaultdict()
    data["success"] = False
    params = flask.request.args
    
    if 'uid' in params.keys():
        uid_toscore = str(params.get('uid'))
        model_path = locate_model(os.getcwd())
        predictions, _ = model_reader(model_path)
        uid_predictions = get_top_n_ui(get_top(predictions), uid_toscore)
        
        prediction_rank_lenght = len(uid_predictions)
        prediction_rank_labels = ["".join([" Product", str(i)]) for i in range(1,prediction_rank_lenght)]
        products_recommended = pd.DataFrame(list(zip(prediction_rank_labels, uid_predictions)), columns=['Product_Rank', 'Product_id'])

        data['response'] = products_recommended.to_dict()
        data['success'] = True
    
    return flask.jsonify(data)
            
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9999, debug=True)

#### Write the requirements.txt

In [None]:
print(flask.__version__)
print(np.__version__)
print(pd.__version__)
print(sklearn.__version__)
print(surprise.__version__)

In [None]:
%%writefile requirements.txt

numpy
pandas
scikit-surprise
flask

#### Write the dockerfile

In [None]:
%%writefile Dockerfile

# Use an official Python runtime as parent image
FROM python:3
    
LABEL Scoring App = "Recommendation System - Python - Surprise"

# RUN apt-get update && apt-get install -y \
# python3-dev \
# build-essential    
        
# Set the working directory to /app
WORKDIR /app

# Copy the current directory contents into the container at /app
ADD . /app

# Upgrade pip and install any needed packages specified in requirements.txt
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

# Make port 9999 available
EXPOSE 9999

#Entrypoint python exec
ENTRYPOINT [ "python" ]

#Run flask application
CMD [ "score_app.py" ]

#### Run the Application

In [None]:
image_name = "score-flask_app:1"

#Client instance
dockercli = docker.DockerClient()

#Check for image
if not dockercli.images.list(image_name):
    #if not build it
    dockercli.images.build(path='.', tag = image_name)
else:
    dockercli.images.remove(image_name, force = True)
    dockercli.images.build(path='.', tag = image_name)
try:
    app_container = dockercli.containers.run(image_name, name='scoring_app_test', detach=True, ports={'9999/tcp': 9999})
    status = app_container.attrs
    print(status['State'])
    while (status['State']['Running'] == False):
        time.sleep(3)
        app_container.reload()
        status = app_container.attrs
        print(''.center(50, '-'))
        print(status['State'])
except RuntimeError as error:
    print(error)

#### Test the Flask Scoring App as web endpoint

In [None]:
protocol = 'http'
server = '10.249.21.252'
port = '9999'

params = {'uid': 100}

# Check that the container is available
score_request = requests.get(protocol + "://" + server + ":" + port + "/predict", params=params)

In [None]:
print(score_request.text)

#### Kill the application

In [None]:
# stop and remove the container
app_container.stop()
app_container.remove()

### Model Deployment: Scoring Interactive Web Service

Because our scenario is going to have a mobile application, we just prototype an plain-vanilla frontend in Dash for testing our model

In [5]:
if not os.path.exists(dash_folder):
    os.makedirs(dash_folder)
os.chdir(dash_folder)

#### Write dash_app.py

In [33]:
%%writefile dash_app.py

# -*- coding: utf-8 -*-

import dash
import dash_core_components as component
import dash_html_components as html

app = dash.Dash(__name__)

# Dash apps 1st element: layout
app.layout = html.Div(children=[
    #Title
    html.H1(children='Shopping App'),
    
    html.Div(children=
             """
             A Scoring Interactive Web Service for testing
             """)
])

if __name__ == '__main__':
    app.run_server(port='8052', debug=True)

Overwriting dash_app.py


#### Write the requirements.txt

In [34]:
%%writefile requirements.txt

dash

Overwriting requirements.txt


#### Write Dockerfile

In [35]:
%%writefile Dockerfile

FROM python:3
    
WORKDIR /app

ADD . /app

RUN pip install -r requirements.txt

EXPOSE 8052

ENTRYPOINT [ "python" ]

CMD [ "dash_app.py" ]

Overwriting Dockerfile


#### Run the application

In [38]:
image_name = "score-dash_app:1"

#Client instance
dockercli = docker.DockerClient()

#Check for image
if not dockercli.images.list(image_name):
    #if not build it
    dockercli.images.build(path='.', tag = image_name)
else:
    dockercli.images.remove(image_name, force = True)
    dockercli.images.build(path='.', tag = image_name)
try:
    app_container = dockercli.containers.run(image_name, name='scoring_dash_app_test', detach=True, ports={'8052/tcp': 8052})
    status = app_container.attrs
    print(status['State'])
    while (status['State']['Running'] == False):
        time.sleep(3)
        app_container.reload()
        status = app_container.attrs
        print(''.center(50, '-'))
        print(status['State'])
except RuntimeError as error:
    print(error)

{'Status': 'created', 'Running': False, 'Paused': False, 'Restarting': False, 'OOMKilled': False, 'Dead': False, 'Pid': 0, 'ExitCode': 0, 'Error': '', 'StartedAt': '0001-01-01T00:00:00Z', 'FinishedAt': '0001-01-01T00:00:00Z'}
--------------------------------------------------
{'Status': 'running', 'Running': True, 'Paused': False, 'Restarting': False, 'OOMKilled': False, 'Dead': False, 'Pid': 11560, 'ExitCode': 0, 'Error': '', 'StartedAt': '2020-06-01T13:09:34.30670579Z', 'FinishedAt': '0001-01-01T00:00:00Z'}


In [39]:
# stop and remove the container
app_container.stop()
app_container.remove()

In [13]:
from jupyter_dash import JupyterDash
# JupyterDash.infer_jupyter_proxy_config()
import dash
import dash_core_components as component
import dash_html_components as html

In [14]:
app = JupyterDash(__name__)

# Dash apps 1st element: layout
app.layout = html.Div(children=[
    #Title
    html.H1(children='Shopping App'),
    
    html.Div(children=
             """
             A Scoring Interactive Web Service for testing
             """)
])

In [15]:
app.run_server(mode="jupyterlab", host='172.23.0.6', port='8051', debug=True)

Running on http://172.23.0.6:8051/
Debugger PIN: 900-301-118
