# Build a Recommendation System for Purchase Data

The scope of this notebook is 

- Prepare the Scoring Function
- Unit Test the Score
- Build Flask Scoring App and deploy as a Web End point
- Build Dash App and deploy as Interactive Web Services

The Business case is an mobile app allowing its customers to place orders before they even have to walk into the store.
When a customer first taps on the “order” page, we may recommend 

- Personalized recommendation with ranked list of items (product IDs) that the user is most likely to want to put in his/her (empty) “basket”

Assuming that the scenario is ModelOps 0. Then: 

1. Data scientists hand over a trained model as an artifact to the engineering team for deployement
2. The handoff can include putting the trained model in the models registry
3. The Scoring process is in Batch on a sigle EC2 instance

We have to reproduce the required development enviroment

0. Define Artefacter function to get the last version of Champion Model (optional)

1. Define Scoring Functions: Batch scoring is the main assumption

    - Define the get_top_items function 
    - Define the get_top_n_ui function
    

2. Unit Test 

3. Define a quick front end that simulate Mobile App (Test it in Docker)


## Settings

### Import libraries

In [1]:
#Data
import sqlalchemy as sql

#Data Science
import pandas as pd
from surprise import dump

#Model Tracking
import mlflow
from mlflow.tracking import MlflowClient

#ML engineering
import flask

#Utils
import os
import glob
import shutil
import logging
from collections import defaultdict
import configparser
import json
import pickle
import unittest
import docker
import pprint
import time
import requests

#Settings
from pprint import pprint
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Set enviroment variables

In [2]:
# Enviroment variables
outmodels = '../models/'
app_folder = '../src/app'

# Set dbconnection variables
dbconnPath = './dbconn.properties'
config = configparser.RawConfigParser()
config.read(dbconnPath)
params = config
db_host=params.get('CONN', 'host')
db_port=params.get('CONN', 'port')
db_user=params.get('CONN', 'user')
db_pwd=params.get('CONN', 'password')
db_name=params.get('CONN', 'database')

# Set connection string
connection_str = f'mysql+pymysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{db_name}'

### Download Model Artefact from Mlflow server

In [3]:
client = MlflowClient()
for regmodel in client.list_registered_models():
    regmodel_info = dict(regmodel)

# pprint(regmodel_info, indent=3)

champion=client.get_registered_model('Champion')
championid=champion.latest_versions[-1].run_id

art_list = [arts.path for arts in client.list_artifacts(championid, path=None)]

for art_path in art_list: 
    client.download_artifacts(championid, art_path, outmodels)

  from collections import Mapping, MutableMapping


### Analyze the Model Artifact

In [4]:
modelpkl = [modelpath for modelpath in glob.glob(outmodels + 'model/*.pkl')][0]
modelpkl

predictions, algo = dump.load(modelpkl)

print('\n')
print('Sample of Predictions: ')
print('\n', predictions[0:10])
print('\n', 'Number of predictions:', len(predictions))



Sample of Predictions: 

 [Prediction(uid='100', iid='0', r_ui=1.0, est=1.4104866760238497, details={'was_impossible': False}), Prediction(uid='100', iid='118', r_ui=2.0, est=1.6290418315812665, details={'was_impossible': False}), Prediction(uid='100', iid='201', r_ui=1.0, est=1.0281647548199493, details={'was_impossible': False}), Prediction(uid='100', iid='24', r_ui=2.0, est=1.065194864590783, details={'was_impossible': False}), Prediction(uid='100', iid='27', r_ui=4.0, est=1.4686861264367166, details={'was_impossible': False}), Prediction(uid='100', iid='282', r_ui=6.0, est=1.2631693065495946, details={'was_impossible': False}), Prediction(uid='100', iid='51', r_ui=0.0, est=1.3060476975670121, details={'was_impossible': False}), Prediction(uid='100', iid='6', r_ui=0.0, est=1.8579018429484466, details={'was_impossible': False}), Prediction(uid='100', iid='62', r_ui=2.0, est=1.0656423124377505, details={'was_impossible': False}), Prediction(uid='100', iid='67', r_ui=3.0, est=1.55989

## Machine Learning Engineering

### Scoring Function

We have to return Top 10 Recommended Items by userid

#### Define the scoring function

In [5]:
def get_top(predictions, n=10):
    
    '''
    Returns the the top-N recommendation from a set of predictions
    
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

def get_top_n_ui(top, uid):
    try:
        top_n_ui = [[iid for (iid, _) in user_ratings] for UID, user_ratings in top.items() if UID==uid][0]
        return top_n_ui
    except ValueError: # user was not part of the trainset
        return 0

#### Unit test

In [6]:
class TestScoreFunction(unittest.TestCase):
    
    def setUp(self):
        self.testcase = "100"
        self.expected = ['6', '118', '67', '27', '0', '51', '282', '62', '24', '201']
    
    def test_empty(self):
        self.assertTrue(bool(get_top_n_ui(get_top(predictions), self.testcase)))

    def test_basic(self):
        self.assertEqual(get_top_n_ui(get_top(predictions), self.testcase), self.expected)
        
unittest.main(argv = ['first-arg-is-ignored'], exit = False)

..
----------------------------------------------------------------------
Ran 2 tests in 0.280s

OK


<unittest.main.TestProgram at 0x7fd602c87510>

### Create the Scoring Flask App and deploy as web endpoint

Because we're testing, I need a app folder with:

1. app.py
2. requirements.txt
3. Dockerfile

Then run the application with Docker Client and test it 

#### Create a app folder

In [9]:
if not os.path.exists(app_folder):
    os.makedirs(app_folder)

#### Copy the model

In [10]:
if not os.path.exists(app_folder + '/model'):
    shutil.copytree(src=outmodels + 'model', dst=app_folder + '/model')

In [11]:
os.chdir(app_folder)

#### Write the app.py

In [12]:
%%writefile app.py

# -*- coding: utf-8 -*-

import os
import logging
from collections import defaultdict

import pandas as pd

import flask

#create an instance
app = flask.Flask(__name__)

def locate_model(dest):
    
    '''
    Locate model pickle file
    
    '''
    for dirpath, dirnames, filenames in os.walk(dest):
        for filename in [f for f in filenames if f.endswith((".pkl", ".pickle"))]:
            model_path = os.path.join(dirpath, filename)
            return model_path
    return None

def model_reader(model_path):
    predictions, algo = dump.load(model_path)
    return predictions, algo

def get_top(predictions, n=10):
    
    '''
    Returns the the top-N recommendation from a set of predictions
    
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

def get_top_n_ui(top, uid):
    try:
        top_n_ui = [[iid for (iid, _) in user_ratings] for UID, user_ratings in top.items() if UID==uid][0]
        return top_n_ui
    except ValueError: # user was not part of the trainset
        return 0

@app.route('/predict', methods=['GET','POST'])
def predict():
    
    logging.info('Scoring Application is starting to process the request')
    
    #Intiate variables
    data = defaultdict()
    data["success"] = False
    params = flask.request.args
    
    if 'uid' in params.keys():
        uid_toscore = str(params.get('uid'))
        model_path = locate_model(os.getcwd())
        predictions, _ = model_reader(model_path)
        uid_predictions = get_top_n_ui(get_top(predictions), uid_toscore)
        
        prediction_rank_lenght = len(uid_predictions)
        prediction_rank_labels = ["".join([str(i), "°", " Product"]) for i in range(1,prediction_rank_lenght)]
        products_recommended = pd.DataFrame(list(zip(prediction_rank_labels, uid_predictions)), columns=['Product_Rank', 'Product_id'])
    
    return flask.jsonify(products_recommended)
            
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9999, debug=True)

Overwriting app.py


#### Write the requirements.txt

In [13]:
print(flask.__version__)
print(pd.__version__)

1.0.3
1.0.4


In [29]:
%%writefile requirements.txt

flask==1.0.3
pandas==1.0.4

Overwriting requirements.txt


#### Write the dockerfile

In [30]:
# %%writefile Dockerfile

# FROM ubuntu:16.04

# RUN apt-get update -y && \
#     apt-get install -y python-pip python-dev

# COPY ./requirements.txt /app/requirements.txt

# WORKDIR /app

# RUN pip install -r requirements.txt

# COPY . /app

# ENV FLASK_APP=app.py

# ENTRYPOINT [ "python" ]

# CMD [ "app.py" ]

In [31]:
%%writefile Dockerfile

FROM conda/miniconda3:latest

LABEL Scoring App = "Reccomendation System - Python - Surprise"

USER root

COPY . /app

RUN pip install --upgrade pip
RUN pip install -r /app/requirements.txt

WORKDIR /app

ENV FLASK_APP=app.py

ENTRYPOINT [ "python" ]

CMD [ "app.py" ]

Overwriting Dockerfile


#### Run the Application

In [34]:
image_name = "score-flask_app:1"

#Client instance
dockercli = docker.DockerClient()

#Check for image
if not dockercli.images.list(image_name):
    #if not build it
    dockercli.images.build(path='.', tag = image_name)
else:
    dockercli.images.remove(image_name, force = True)
    dockercli.images.build(path='.', tag = image_name)
try:
    app_container = dockercli.containers.run(image_name, name='scoring_app_test', detach=True, ports={'9999/tcp': 9999})
    status = app_container.attrs
    print(status['State'])
    while (status['State']['Running'] == False):
        time.sleep(3)
        app_container.reload()
        status = app_container.attrs
        print(''.center(50, '-'))
        print(status['State'])
except RuntimeError as error:
    print(error)

{'Status': 'created', 'Running': False, 'Paused': False, 'Restarting': False, 'OOMKilled': False, 'Dead': False, 'Pid': 0, 'ExitCode': 0, 'Error': '', 'StartedAt': '0001-01-01T00:00:00Z', 'FinishedAt': '0001-01-01T00:00:00Z'}
--------------------------------------------------
{'Status': 'running', 'Running': True, 'Paused': False, 'Restarting': False, 'OOMKilled': False, 'Dead': False, 'Pid': 13378, 'ExitCode': 0, 'Error': '', 'StartedAt': '2020-06-01T07:01:15.626662092Z', 'FinishedAt': '0001-01-01T00:00:00Z'}


#### Test the Flask Scoring App as web endpoint

In [35]:
protocol = input("Please provide protocol (http/https) ")
server = input("Please provide server ip ")
port = input("Please provide port ")

# Check that the container is available
score_request = requests.get(protocol + "://" + server + ":" + port + "/predict")

Please provide protocol (http/https)  http
Please provide server ip  10.249.21.252
Please provide port  9999


In [36]:
print(score_request.text)

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  "http://www.w3.org/TR/html4/loose.dtd">
<html>
  <head>
    <title>UnboundLocalError: local variable 'products_recommended' referenced before assignment // Werkzeug Debugger</title>
    <link rel="stylesheet" href="?__debugger__=yes&amp;cmd=resource&amp;f=style.css"
        type="text/css">
    <!-- We need to make sure this has a favicon so that the debugger does
         not by accident trigger a request to /favicon.ico which might
         change the application state. -->
    <link rel="shortcut icon"
        href="?__debugger__=yes&amp;cmd=resource&amp;f=console.png">
    <script src="?__debugger__=yes&amp;cmd=resource&amp;f=jquery.js"></script>
    <script src="?__debugger__=yes&amp;cmd=resource&amp;f=debugger.js"></script>
    <script type="text/javascript">
      var TRACEBACK = 140670486819000,
          CONSOLE_MODE = false,
          EVALEX = true,
          EVALEX_TRUSTED = false,
          SECRET = "9FiM9EoAJ

#### Kill the application

In [33]:
# stop and remove the container
app_container.stop()
app_container.remove()

In [None]:
from jupyter_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html

# viewer = JupyterDash.AppViewer()

# app = dash.Dash(__name__)

# app.layout = html.Div('Hello World')

# viewer.show(app)

In [None]:
df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')
available_indicators = df['Indicator Name'].unique()

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
server = app.server

app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Dropdown(
                id='crossfilter-xaxis-column',
                options=[{'label': i, 'value': i} for i in available_indicators],
                value='Fertility rate, total (births per woman)'
            ),
            dcc.RadioItems(
                id='crossfilter-xaxis-type',
                options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],
                value='Linear',
                labelStyle={'display': 'inline-block'}
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),

        html.Div([
            dcc.Dropdown(
                id='crossfilter-yaxis-column',
                options=[{'label': i, 'value': i} for i in available_indicators],
                value='Life expectancy at birth, total (years)'
            ),
            dcc.RadioItems(
                id='crossfilter-yaxis-type',
                options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],
                value='Linear',
                labelStyle={'display': 'inline-block'}
            )
        ], style={'width': '49%', 'float': 'right', 'display': 'inline-block'})
    ], style={
        'borderBottom': 'thin lightgrey solid',
        'backgroundColor': 'rgb(250, 250, 250)',
        'padding': '10px 5px'
    }),

    html.Div([
        dcc.Graph(
            id='crossfilter-indicator-scatter',
            hoverData={'points': [{'customdata': 'Japan'}]}
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    html.Div([
        dcc.Graph(id='x-time-series'),
        dcc.Graph(id='y-time-series'),
    ], style={'display': 'inline-block', 'width': '49%'}),

    html.Div(dcc.Slider(
        id='crossfilter-year--slider',
        min=df['Year'].min(),
        max=df['Year'].max(),
        value=df['Year'].max(),
        marks={str(year): str(year) for year in df['Year'].unique()},
        step=None
    ), style={'width': '49%', 'padding': '0px 20px 20px 20px'})
])


@app.callback(
    dash.dependencies.Output('crossfilter-indicator-scatter', 'figure'),
    [dash.dependencies.Input('crossfilter-xaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-xaxis-type', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-type', 'value'),
     dash.dependencies.Input('crossfilter-year--slider', 'value')])
def update_graph(xaxis_column_name, yaxis_column_name,
                 xaxis_type, yaxis_type,
                 year_value):
    dff = df[df['Year'] == year_value]

    return {
        'data': [dict(
            x=dff[dff['Indicator Name'] == xaxis_column_name]['Value'],
            y=dff[dff['Indicator Name'] == yaxis_column_name]['Value'],
            text=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],
            customdata=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],
            mode='markers',
            marker={
                'size': 25,
                'opacity': 0.7,
                'color': 'orange',
                'line': {'width': 2, 'color': 'purple'}
            }
        )],
        'layout': dict(
            xaxis={
                'title': xaxis_column_name,
                'type': 'linear' if xaxis_type == 'Linear' else 'log'
            },
            yaxis={
                'title': yaxis_column_name,
                'type': 'linear' if yaxis_type == 'Linear' else 'log'
            },
            margin={'l': 40, 'b': 30, 't': 10, 'r': 0},
            height=450,
            hovermode='closest'
        )
    }


def create_time_series(dff, axis_type, title):
    return {
        'data': [dict(
            x=dff['Year'],
            y=dff['Value'],
            mode='lines+markers'
        )],
        'layout': {
            'height': 225,
            'margin': {'l': 20, 'b': 30, 'r': 10, 't': 10},
            'annotations': [{
                'x': 0, 'y': 0.85, 'xanchor': 'left', 'yanchor': 'bottom',
                'xref': 'paper', 'yref': 'paper', 'showarrow': False,
                'align': 'left', 'bgcolor': 'rgba(255, 255, 255, 0.5)',
                'text': title
            }],
            'yaxis': {'type': 'linear' if axis_type == 'Linear' else 'log'},
            'xaxis': {'showgrid': False}
        }
    }


@app.callback(
    dash.dependencies.Output('x-time-series', 'figure'),
    [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),
     dash.dependencies.Input('crossfilter-xaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-xaxis-type', 'value')])
def update_y_timeseries(hoverData, xaxis_column_name, axis_type):
    country_name = hoverData['points'][0]['customdata']
    dff = df[df['Country Name'] == country_name]
    dff = dff[dff['Indicator Name'] == xaxis_column_name]
    title = '<b>{}</b><br>{}'.format(country_name, xaxis_column_name)
    return create_time_series(dff, axis_type, title)


@app.callback(
    dash.dependencies.Output('y-time-series', 'figure'),
    [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),
     dash.dependencies.Input('crossfilter-yaxis-column', 'value'),
     dash.dependencies.Input('crossfilter-yaxis-type', 'value')])
def update_x_timeseries(hoverData, yaxis_column_name, axis_type):
    dff = df[df['Country Name'] == hoverData['points'][0]['customdata']]
    dff = dff[dff['Indicator Name'] == yaxis_column_name]
    return create_time_series(dff, axis_type, yaxis_column_name)

In [None]:
app.run_server(mode="jupyterlab")