# Deploy NLP Model to Azure
### Model is already trained on local environment and use SDK to deploy to Azure.

### Key Take Away:
-  For NLP model, Tokenizer and Neural Network Model, both need to be pickled and register models in workspace. It should be in 'init() module'.
- Before text data go into the model, preprocessing could be done in 'run()' module. 

step 1,2,3 were done in local environment, step 4,5,6 were done in Azure.

--- Local miniconda Jupyter Notebook
- 1. Create Workspace
- 2. Pickle the model
- 3. Register the model

--- Azure Jupyter Notebook
- 4. Create Image
- 5. Deploy WebService
- 6. Test calling web service

In [7]:
# import library
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.webservice import Webservice
from azureml.core.image import ContainerImage
from azureml.core.webservice import AciWebservice
from azureml.core.conda_dependencies import CondaDependencies

## Create Workspace

In [None]:
'''
If already created workspace, you can skip creating workspace part, and go to initialize workspace step.
'''
from azureml.core import Workspace
ws = Workspace.create(name='myworkspace',
                      subscription_id='here is your Azure subcription id', 
                      resource_group='my_resource_group_name',
                      create_resource_group=True,
                      location='Australia East' 
                     )

In [None]:
# write configuration json file
ws.write_config()

### check configuration

In [17]:
!cat ./config.json

{
    "subscription_id": "d2f07f04-8cd1-4fcf-a2cd-da597ebb0cdc",
    "resource_group": "NLP_RegTech",
    "workspace_name": "myworkspace"
}

### Initialize Workspace
If already created workspace, you can skip creating workspace part 
and just need to initialize workspace before do the rest of the steps.

In [16]:
ws = Workspace.from_config()

Found the config file in: /Users/hua/Desktop/NLP/aml_config/config.json


### Pickle the model and tokenizer
For pickle details, please check: [Part 2 file : NLP Model_Text multi-classification.ipynb](https://github.com/Sweetflowerjulia/NLP-Model_Text-Category-with-GloVe-and-LSTM/blob/master/NLP%20Model_Text%20multi-classification.ipynb)

## Register Model
Register two models: prediction model and tokenizer

In [14]:
model = Model.register(model_path = "model.pkl",
                       model_name = "model_600",
                       tags = {"key": "1"},
                       description = "Category Prediction",
                       workspace = ws)

Registering model model_600


In [56]:
tokenizer = Model.register(model_path = "tokenizer.pickle",
                       model_name = "tokenizer",
                       tags = {"key": "2"},
                       description = "tokenizer",
                       workspace = ws)

Registering model tokenizer


### Check all models in the workspace

In [88]:
models = Model.list(workspace=ws)
for m in models:
    if m.name == "model_600":
        model = m
    if m.name == "tokenizer":
        tokenizer = m
    print("Name:", m.name,"\tVersion:", m.version, "\tDescription:", m.description, m.tags)

Name: tokenizer 	Version: 1 	Description: tokenizer {'key': '2'}
Name: model_600 	Version: 1 	Description: Objective Category Prediction {'key': '1'}


### After Registered Model, you can see the models in your Azure workspace 'Model' tab.
## ------- From here done in Azure Jupyter Notebook --------

## Create image

In [169]:
%%writefile score.py

# import library
import pickle, json
from azureml.core.model import Model

# Keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, Dropout
from keras.layers.embeddings import Embedding

# Other
import numpy as np
import pandas as pd
import re
import io


def init():
    global model,tokenizer
    model_path = Model.get_model_path(model_name = "model_600")
    tokenizer_path = Model.get_model_path(model_name = "tokenizer")
    model,tokenizer = pickle.load(open(model_path,"rb")), pickle.load(open(tokenizer_path,"rb"))
    

def run(raw_data):
    
    try:
        text = json.loads(raw_data)["data"]
        
        prob = 0.5
        minlen = 6
        maxlen = 10
        
       '''
       Preprocessing text before put into the model.
       '''  
        # clean text
        text = text.lower()     # Converting to lowercase
        text = re.sub(r'[?|!|\'|"|#|,|)|(|\|/$%\n\t.:;""‘’]',r'',text)
        
        # text to phrases
        word_list = text.split(' ')
        len_text = len(word_list)
        
        phrases = []   
        len_each_phrase = list(set([minlen,(maxlen + minlen)//2, maxlen]))    

        i = 0
        while i <= len_text:
            for nword in len_each_phrase:    
                if i+nword >= len_text:
                    phrase = ' '.join(word_list[i:])
                    phrases.append(phrase)
                    i = len_text
                    break

                else:
                    phrase = ' '.join(word_list[i:i+nword])
                    phrases.append(phrase)
            i += 1
            
        # tokenize phrases (text -> numerical value)
        sequences = tokenizer.texts_to_sequences(phrases) #phrases
        test_data = pad_sequences(sequences, maxlen = maxlen)
        
        # predict with model
        preds = model.predict(test_data)

        preds_df = pd.DataFrame(preds)
        count = preds_df[preds_df > prob].count()
        
        # return label based on probability
        if sum(count[1:] > 0) > 0:
            result = count.idxmax()
        else:
            result = 0

        return json.dumps({"category": result}) 
    
    except Exception as e:
        result = str(e)
        return json.dumps({"category": result})

Overwriting score.py


## Create environment file

In [170]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies.create(conda_packages=["numpy","keras","pandas"]) #"nltk.download('punkt')",

# myenv = CondaDependencies()
myenv.add_pip_package("pynacl==1.2.1")

with open("fullenv.yml","w") as f:
    f.write(myenv.serialize_to_string())


## Deploy image

In [171]:
from azureml.core.image import Image, ContainerImage

image_config = ContainerImage.image_configuration(runtime= "python",
                                 execution_script="score.py",
                                 conda_file="fullenv.yml")

image = Image.create(name = "myimage2",
                     # this is the model object 
                     models = [model,tokenizer],
                     image_config = image_config, 
                     workspace = ws)

Creating image


In [172]:
image.wait_for_creation(show_output = True)

Running.....................................................
SucceededImage creation operation finished for image myimage2:8, operation "Succeeded"


## Deploy WebService

In [173]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 2, 
                                               memory_gb = 2, 
                                              auth_enabled = True) # auth_key

In [175]:
# service.delete()

In [176]:
from azureml.core.webservice import Webservice

service_name = 'my-aci-service-3'
print(service_name)
service = Webservice.deploy_from_image(deployment_config = aciconfig,
                                           image = image,
                                           name = service_name,
                                           workspace = ws)
service.wait_for_deployment(True)
print(service.state)

my-aci-service-3
Creating service
Running............................
SucceededACI service creation operation finished, operation "Succeeded"
Healthy


In [179]:
# print(service.get_logs())

## Test WebService

In [180]:
import json
import numpy as np

test_sample = json.dumps({'data': 'receive ............. your life'})

prediction = service.run(input_data=test_sample)
print(prediction)

{"category": 7}


## Below is the important imformation that call the webservice.
- Scoring_uri
- Auth_Key (primary key)

In [None]:
print(service.scoring_uri)

In [None]:
primary, secondary = service.get_keys()
print(primary)

## Test Calling the service using AuthKey and URI

In [183]:
import requests
import json

# URL for the web service
scoring_uri = 'XXX....XX'
# If the service is authenticated, set the key
key = 'XXX......XX'

# Two sets of data to score, so we get two results back
data = {"data": "receive ............. your life"    
        }
# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = { 'Content-Type':'application/json' }
# If authentication is enabled, set the authorization header
headers['Authorization']=f'Bearer {key}'

# Make the request and display the response
result = requests.post(scoring_uri, input_data, headers = headers)
print(result.text)

"{\"category\": 7}"
