# LAB1 - Model Operationalization & Deployment

In this notebook, we will create the artifacts and scripts to deploy the LSTM model into a webservice on Azure. The artifacts include the model files, and test scripts to validate your model

In [13]:
import keras
# import the libraries
import os
import pandas as pd
import numpy as np
import pickle
import json
import shutil
from keras.models import load_model
from urllib.request import urlretrieve

import h5py

# For Azure blob storage access
from azure.storage.blob import BlockBlobService
from azure.storage.blob import PublicAccess

In [129]:
TICKER = "MSFT"

SHARE_ROOT = "./stockdemo-model/"

# the model in h5 format
LSTM_MODEL = TICKER +'-modellstm.h5'
LSTM_MODEL_PATH = SHARE_ROOT + LSTM_MODEL

# the min_max values dictionary
MIN_MAX_DICT = TICKER +'-min_max.pkl'
MIN_MAX_DICT_PATH = SHARE_ROOT + MIN_MAX_DICT

# Azure Container Service (ACI) Name
ACI_SERVICE_NAME = TICKER + '-aciservice'

## Load the test data frame

In [97]:
data = pd.read_csv("MSFT.csv", index_col='Date')
# Converting the index as date
data.index = pd.to_datetime(data.index)

In [98]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-01-03,117.37,118.62,112.0,116.56,26614200.0,0.0,1.0,39.066089,39.482146,37.278708,38.796484,53228400.0
2000-01-04,113.56,117.12,112.25,112.62,27059500.0,0.0,1.0,37.797947,38.982878,37.36192,37.485073,54119000.0
2000-01-05,111.12,116.37,109.37,113.81,32029800.0,0.0,1.0,36.985804,38.733244,36.403324,37.881159,64059600.0
2000-01-06,112.19,113.87,108.37,110.0,27488300.0,0.0,1.0,37.341949,37.90113,36.070479,36.613017,54976600.0
2000-01-07,108.62,112.25,107.31,111.44,31006800.0,0.0,1.0,36.15369,37.36192,35.717662,37.092315,62013600.0


In [99]:
test_df = data.iloc[-10:].drop(columns=['Adj. Open','Adj. High','Adj. Low','Adj. Volume','Adj. Close', 'Ex-Dividend', 'Split Ratio'])

In [100]:
test_df.head(15)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-03-14,95.12,95.41,93.5,93.85,31576898.0
2018-03-15,93.53,94.58,92.83,94.18,26279014.0
2018-03-16,94.68,95.38,93.92,94.6,47329521.0
2018-03-19,93.74,93.9,92.11,92.89,31752589.0
2018-03-20,93.05,93.77,93.0,93.13,21787780.0
2018-03-21,92.93,94.05,92.21,92.48,23753263.0
2018-03-22,91.265,91.75,89.66,89.79,37578166.0
2018-03-23,89.5,90.46,87.08,87.18,42159397.0
2018-03-26,90.61,94.0,90.4,93.78,55031149.0
2018-03-27,94.94,95.139,88.51,89.47,53704562.0


In [101]:
#Save Test Dataset as pickle for later use
# the test data in pkl format
TEST_DATA_PATH = SHARE_ROOT + 'test_dataframe.pkl'
test_df.to_pickle(TEST_DATA_PATH)

We will need to recreate the feature engineering (creating the sequence features) just as we did in the model building notebook.

We will do this within the webservice so that the service can take the raw  data, and return a scored result predicting the value (label).

When scoreing an unseen observation, the model will not know the true labels. Therefore, we create a score_df without labels.

### Test init() and run() functions to read from the working directory

The web service requires two functions, an init() function that will initialize the web service by loading the model into the service, and a run() function that will engineer the features to match the model call structure, and score that data set. We create the functions in here for testing and debugging.

In [106]:
def init():
    # read in the model file
    global model
    global min_max_dict_list
    
    # load model
    model = load_model(LSTM_MODEL_PATH)
    print("Model Loaded")
    
    # Load Min Max list values
    with open(MIN_MAX_DICT_PATH, 'rb') as handle:
        min_max_dict_list = pickle.load(handle)
        print("Min_max List loaded")

In [107]:
def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = pd.read_json(data, orient='records')
        data_n = data.copy()
        
        # Normalize data
        min_dict = min_max_dict_list[0]
        max_dict = min_max_dict_list[1]
        for feature_name in data.columns:
            data_n[feature_name] = (data[feature_name] - min_dict[feature_name]) / (max_dict[feature_name] - min_dict[feature_name])
        
        # Create sequences
        data = data_n.values 
        seq_len = 10
        result = []
        for index in range(len(data) - seq_len + 1):
            result.append(data[index: index + seq_len + 1])

        result = np.array(result)
        print(result.shape)
        
        pred = model.predict(result)
        print(pred)
        
        # de-normalize the target
        pred = pred * (max_dict["Close"] - min_dict["Close"]) + min_dict["Close"]
        
        # Send results
        pred = pred.tolist()
        return json.dumps({"result": pred})

    except Exception as e:
        result = str(e)
        return json.dumps({"error": result})

The webservice test requires an initialize of the webservice, then send the entire scoring data set into the model. We expect to get 1  prediction for each input in the scoring data set.

In [108]:
json.dumps({"data": test_df.to_json(orient='records')})

'{"data": "[{\\"Open\\":95.12,\\"High\\":95.41,\\"Low\\":93.5,\\"Close\\":93.85,\\"Volume\\":31576898.0},{\\"Open\\":93.53,\\"High\\":94.58,\\"Low\\":92.83,\\"Close\\":94.18,\\"Volume\\":26279014.0},{\\"Open\\":94.68,\\"High\\":95.38,\\"Low\\":93.92,\\"Close\\":94.6,\\"Volume\\":47329521.0},{\\"Open\\":93.74,\\"High\\":93.9,\\"Low\\":92.11,\\"Close\\":92.89,\\"Volume\\":31752589.0},{\\"Open\\":93.05,\\"High\\":93.77,\\"Low\\":93.0,\\"Close\\":93.13,\\"Volume\\":21787780.0},{\\"Open\\":92.93,\\"High\\":94.05,\\"Low\\":92.21,\\"Close\\":92.48,\\"Volume\\":23753263.0},{\\"Open\\":91.265,\\"High\\":91.75,\\"Low\\":89.66,\\"Close\\":89.79,\\"Volume\\":37578166.0},{\\"Open\\":89.5,\\"High\\":90.46,\\"Low\\":87.08,\\"Close\\":87.18,\\"Volume\\":42159397.0},{\\"Open\\":90.61,\\"High\\":94.0,\\"Low\\":90.4,\\"Close\\":93.78,\\"Volume\\":55031149.0},{\\"Open\\":94.94,\\"High\\":95.139,\\"Low\\":88.51,\\"Close\\":89.47,\\"Volume\\":53704562.0}]"}'

In [109]:
init()
pred=run(json.dumps({"data": test_df.to_json(orient='records')}))
print(pred)

Model Loaded
Min_max List loaded
(1, 10, 5)
[[0.7519344]]
{"result": [[91.40367126464844]]}


## Persist model assets

Next we persist the assets we have created for use in operationalization. The conda dependencies are defined in this YAML file. This will be used to tell the webservice server which python packages are required to run this web service

In [146]:
%%writefile {SHARE_ROOT}myenv.yml
name: myenv
channels:
  - defaults
dependencies:
  - python=3.5.2
  - pip:
    - keras
    - tensorflow
    - h5py
    # Required packages for AzureML execution, history, and data preparation.
    - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1
    - azureml-core

Overwriting ./stockdemo-model/myenv.yml


The lstmscore.py file is python code defining the web service operation. It includes both the init() and run() functions defined earlier imports the required libraries. These should be nearly identical to the previous defined versions.

In [144]:
%%writefile {SHARE_ROOT}score.py

import pickle
import json
import numpy as np
import pandas as pd
from azureml.core.model import Model
from keras.models import load_model

TICKER = "MSFT"
LSTM_MODEL = TICKER +'-modellstm.h5'
MIN_MAX_DICT = TICKER +'-min_max.pkl'


def init():
    global model
    global min_max_dict_list
    
    # load model
    model_path = Model.get_model_path(model_name = LSTM_MODEL)
    model = load_model(model_path)

    # Load Min Max list values
    model_path = Model.get_model_path(model_name = MIN_MAX_DICT)
    with open(model_path, 'rb') as handle:
        min_max_dict_list = pickle.load(handle)
        print("Min_max List loaded")

def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = pd.read_json(data, orient='records')
        data_n = data.copy()
        
        # Normalize data
        min_dict = min_max_dict_list[0]
        max_dict = min_max_dict_list[1]
        for feature_name in data.columns:
            data_n[feature_name] = (data[feature_name] - min_dict[feature_name]) / (max_dict[feature_name] - min_dict[feature_name])
        
        # Create sequences
        data = data_n.values 
        seq_len = 10
        result = []
        for index in range(len(data) - seq_len + 1):
            result.append(data[index: index + seq_len + 1])

        result = np.array(result)
        print(result.shape)
        
        pred = model.predict(result)
        print(pred)
        
        # De-normalize the target
        pred = pred * (max_dict["Close"] - min_dict["Close"]) + min_dict["Close"]
        
        # Send results
        pred = pred.tolist()
        return json.dumps({"result": pred})

    except Exception as e:
        result = str(e)
        return json.dumps({"error": result})

Overwriting ./stockdemo-model/score.py


We also include a python file test_service.py which can test the web service you create. 

In [169]:
%%writefile {SHARE_ROOT}test_service.py

import urllib
import json 
import requests
import pandas as pd

# The URL will need to be editted after service create.
url = 'http://23.96.11.240:5001/score'

## Sequence length will need to match the training sequence length from the model training
sequence_length = 10

# We'll read in this data to test the service
body = pd.read_pickle('test_dataframe.pkl')
headers = {'Content-Type':'application/json'}

try:
    if body.shape[0] < sequence_length : 
        print("Skipping scoring as we need {} records to score and only have {} records.".format(sequence_length, body.shape[0]))
    else:
        #print('{}'.format(body.shape))
        body = json.dumps({"data": body.to_json(orient='records')})
        print (body + '\n')
        req = urllib.request.Request(url, str.encode(body), headers) 
        
        with urllib.request.urlopen(req) as response:
            the_page = response.read()
            print('{}'.format(the_page))
        
except urllib.error.HTTPError as error:
    print("The request failed with status code {}: \n{}".format(error, error.read))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.reason)      

Overwriting ./stockdemo-model/test_service.py


## Configuring Docker on Linux

> On a Linux DSVM, run the script below to configure Docker correctly. **Remember to log out and log back in after running the script.**

```bash
sudo /opt/microsoft/azureml/initial_setup.sh
```

# Creating a web service out of the scoring script

Let's now see how we can create a scoring web service from the above model. We are going to be using the Preview of the Azure ML Python SDK.


### 1. Download and install Azure ML Python SDK
In a terminal window, type the following commands.
  
```shell
# create a new conda environment with Python 3.6, numpy and cython
$ conda create -n myenv Python=3.6 cython num

# Activate the conde environment
$ source activate myenv

# check pip is pointing to the right pip path
(myenv) $ pip --version
# you should see a path that includes the name of the conda environment (myenv) such as:
# <user-home-dir>/miniconda3/envs/myenv/lib/python3.6/site-packages (python 3.6)

# install azure-cli
(myenv) $ pip install azure-cli

# install or update azureml meta-package
(myenv) $ pip install --upgrade --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1 azureml-sdk

# add myenv as a new Jupyter Kernel
(myenv) $ python -m ipykernel install --user --name myenv --display-name "myenv”

# Now change the kernel on this notebook to myenv

```

### 2. Register the new RP (Azure Resource Provider)
You also must register the new RP in your subscription:
```shell
$ az login
$ az account set -s "<subscription_id>"

# register the new RP
$ az provider register -n Microsoft.MachineLearningServices

# check the registration status
$ az provider show -n Microsoft.MachineLearningServices
```

### 3. Configure the AML Environment

In [4]:
import azureml.core

print("SDK Version:", azureml.core.VERSION)

SDK Version: 0.1.0.1095338


In [6]:
subscription_id = "b1395605-1fe9-4af4-b3ff-82a4725a3791"
resource_group = "meetup_aml_rg"
workspace_name = "meetup_aml_workspace"
workspace_region = 'eastus2' # or eastus2euap

In [7]:
# import the Workspace class and create the AML Workspace
from azureml.core import Workspace

ws = Workspace.create(name = workspace_name,
                      subscription_id = subscription_id,
                      resource_group = resource_group, 
                      location = workspace_region)
ws.get_details()



{'id': '/subscriptions/b1395605-1fe9-4af4-b3ff-82a4725a3791/resourceGroups/meetup_aml_rg/providers/Microsoft.MachineLearningServices/workspaces/meetup_aml_workspace',
 'name': 'meetup_aml_workspace',
 'location': 'eastus2',
 'type': 'Microsoft.MachineLearningServices/workspaces',
 'description': '',
 'friendlyName': 'meetup_aml_workspace',
 'containerRegistry': '/subscriptions/b1395605-1fe9-4af4-b3ff-82a4725a3791/resourcegroups/meetup_aml_rg/providers/microsoft.containerregistry/registries/meetupamacrptecrljo',
 'keyVault': '/subscriptions/b1395605-1fe9-4af4-b3ff-82a4725a3791/resourcegroups/meetup_aml_rg/providers/microsoft.keyvault/vaults/meetupamkeyvaultonhjrjbp',
 'applicationInsights': '/subscriptions/b1395605-1fe9-4af4-b3ff-82a4725a3791/resourcegroups/meetup_aml_rg/providers/microsoft.insights/components/meetupaminsightsokkbjkte',
 'identityPrincipalId': '9db92e39-4f84-436d-b7d0-c9dd25766f54',
 'identityTenantId': '72f988bf-86f1-41af-91ab-2d7cd011db47',
 'identityType': 'SystemAss

In [8]:
#You can validate that you have access to the specified workspace and write a configuration file 
#to the default configuration location, ./aml_config/config.json

ws = Workspace(workspace_name = workspace_name,
               subscription_id = subscription_id,
               resource_group = resource_group)

# persist the subscription id, resource group name, and workspace name in aml_config/config.json.
ws.write_config()

Wrote the config file config.json to: /home/sshuser/notebooks/Meetups-Data-AI-DFW/aml_config/config.json


In [10]:
# load workspace configuratio from ./aml_config/config.json file
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

Found the config file in: /home/sshuser/notebooks/Meetups-Data-AI-DFW/aml_config/config.json
meetup_aml_workspace
meetup_aml_rg
eastus2
b1395605-1fe9-4af4-b3ff-82a4725a3791


### 4. Register Model

In [111]:
from azureml.core.model import Model

model = Model.register(model_path = LSTM_MODEL_PATH,
                       model_name = LSTM_MODEL,
                       tags = [TICKER, "Close", "lstm"],
                       description = "LSTM regression model to predict "+ TICKER +" Close price",
                       workspace = ws)

Registering model MSFT-modellstm.h5


In [115]:
min_max_dict_model = Model.register(model_path = MIN_MAX_DICT_PATH,
                       model_name = MIN_MAX_DICT,
                       tags = [TICKER, "MinMaxDict"],
                       description = "MIN_MAX dictionary use to normalization of "+ TICKER +" stock data",
                       workspace = ws)

Registering model MSFT-min_max.pkl


In [116]:
print(min_max_dict_model.name, min_max_dict_model.description, min_max_dict_model.version, sep = '\t')

MSFT-min_max.pkl	MIN_MAX dictionary use to normalization of MSFT stock data	1


You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers.

In [117]:
regression_models = ws.models(tag = TICKER)
for m in regression_models:
    print("Name:", m.name,"\tVersion:", m.version, "\tDescription:", m.description, m.tags)

Name: MSFT-min_max.pkl 	Version: 1 	Description: MIN_MAX dictionary use to normalization of MSFT stock data ['MSFT', 'MinMaxDict']
Name: MSFT-modellstm.h5 	Version: 1 	Description: LSTM regression model to predict MSFT Close price ['MSFT', 'Close', 'lstm']
Name: MSFT-modellstm 	Version: 1 	Description: LSTM regression model to predict MSFT price ['MSFT', 'regressionPrice', 'lstm']


### 5. Create Docker Image

Note that following command can take few minutes.<br>
Note that the score.py and the conda yml file must be in the same directory than this notebook.<br>
You can add tags and descriptions to images. Also, an image can contain multiple models.

In [147]:
!cp ./stockdemo-model/score.py ./

In [148]:
!cp ./stockdemo-model/myenv.yml ./

In [149]:
from azureml.core.image import Image
image = Image.create(name = TICKER.lower() + ".image",
                     # this is the model object 
                     models = [model, min_max_dict_model],
                     runtime = "python",
                     execution_script = "score.py",
                     conda_file = "myenv.yml",
                     tags = [TICKER, "Close", "lstm"],
                     description = "Image with "+ TICKER + "regression LSTM model",
                     workspace = ws)

image.wait_for_creation(show_output = True)

Creating image
Running..................................
SucceededImage creation operation finished for image msft.image:3, operation "Succeeded"


In [150]:
!rm score.py myenv.yml

In [151]:
for i in Image.list(workspace = ws,tag = TICKER):
    print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))

msft.image(v.3 [Succeeded]) stored at meetupamacrptecrljo.azurecr.io/msft.image:3 with build log https://eastus2ice.blob.core.windows.net/logs/meetupamacrptecrljo15306109361596356.txt?sig=FPKAH8d6uz%2BiOc9rUBIxcAVEggab2sS6KyWKoWdlsIQ%3D&sr=b&sp=r&se=2018-08-02T09%3A45%3A12Z&sv=2017-04-17
msft.image(v.2 [Succeeded]) stored at meetupamacrptecrljo.azurecr.io/msft.image:2 with build log https://eastus2ice.blob.core.windows.net/logs/meetupamacrptecrljo15306105079018524.txt?sp=r&sig=Rlpy9X%2BBX9UyOOUQhLXcgB0Sn/syzZmdJ2ylY1NSgwE%3D&se=2018-08-02T09%3A38%3A08Z&sr=b&sv=2017-04-17
msft.image(v.1 [Succeeded]) stored at meetupamacrptecrljo.azurecr.io/msft.image:1 with build log https://eastus2ice.blob.core.windows.net/logs/meetupamacrptecrljo15306088028517042.txt?se=2018-08-02T09%3A11%3A04Z&sp=r&sv=2017-04-17&sr=b&sig=6cxnALzsttwmDpruibbi3Xl5C6OQDX5EzDAwicawLWk%3D


### 6. Deploy image as web service on Azure Container Instance

Note that the service creation can take few minutes.

In [153]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 4, 
                                               tags = [TICKER, "Close", "lstm"], 
                                               description = "ACI Service to predict "+ TICKER +" Close price")

In [157]:
from azureml.core.webservice import Webservice

aci_service_name = ACI_SERVICE_NAME.lower()
print(aci_service_name)
aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,
                                           image = image,
                                           name = aci_service_name,
                                           workspace = ws)
aci_service.wait_for_deployment(True)
print(aci_service.state)

msft-aciservice
Creating service
Running....................................
SucceededACI service creation operation finished, operation "Succeeded"
Healthy


In [156]:
#Run this command to debug if Service failed
aci_service.get_logs()

### 7. Test web service

In [158]:
print('web service is hosted in ACI:', aci_service.scoring_uri)

web service is hosted in ACI: http://23.96.11.240:5001/score


In [159]:
import json

test_sample = json.dumps({"data": test_df.to_json(orient='records')})

prediction = aci_service.run(input_data = test_sample)
print(prediction)

{"result": [[91.40367126464844]]}


Or you can run the test_service.py on the terminal and should yield the same result

### 8. Delete web service

In [None]:
aci_service.delete()