## Library for test

In [17]:
import joblib
import os
import json

import numpy as np
import pandas as pd
import re
import string
import pathlib


from keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model
from keras.preprocessing.text import text_to_word_sequence, tokenizer_from_json


from collections import Counter

import nltk
import contractions

## Connect to workspace

In [3]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.26.0 to work with projet_7


## Get data from experiment run

In [4]:
run_id = 'glove_1622827159_36953bd1'
run1 = ws.get_run(run_id)
v = run1.get_metrics()
max_l = v['max_l']

In [81]:
run1.download_file('outputs/tok.json', output_file_path='tok1.json')

In [82]:
with open('tok1.json') as f:
    data = json.load(f)
    t = tokenizer_from_json(data)

## Register model

In [82]:
from azureml.core.model import Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

In [12]:
model2 = run1.register_model(model_name='glove_sample',
                           tags={'max_l': '45'},
                           model_path='outputs/glove')
print(model2.name, model2.id, model2.version, sep='\t')

glove_sample	glove_sample:4	4


## Load environment

In [5]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.widgets import RunDetails

In [97]:
env = Environment.from_conda_specification('proj7-h', 'env.yml')
#registered_env = Environment.get(ws, 'proj7-h')

## Add azureml default to environment

In [7]:
#conda_dep = CondaDependencies()
#conda_dep.add_pip_package("azureml-defaults")

In [8]:
#registered_env.python.conda_dependencies=conda_dep

## Script

In [209]:
%%writefile source_dir/score.py

import joblib
import os
import json

import numpy as np
import pandas as pd
import re
import string
import pathlib

from keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model
from keras.preprocessing.text import text_to_word_sequence, tokenizer_from_json


from collections import Counter

import nltk
import contractions

from azureml.core.model import Model



def init():
    
    global model
    model_root = Model.get_model_path('glove_sample')
    #model_root_dl = os.path.join(model_root, 'glove')
    model = load_model(model_root, compile=True)
    print('this is init')


def run(data):
    data = json.loads(data)
    
    #load data into list
    L = []
    for key in data:
        L.append(data[key])
    
    #load data into dataframe 
    df = pd.DataFrame(L, columns={'text'})
    
    #preprocess text
    X = pre_pro(df, 45)
    
    #make prediction
    prediction = model.predict(X)
    
    #load result into nested json
    outputs = {}

    for key in data:
    #for i in range(0,len(prediction)):
        text = {}
        i = int(key)
        text['text'] = data[key]
        text['score'] = float(prediction[i])
        if prediction[i]>0.5:
            text['sentiment'] = 'positive'
        else:
            text['sentiment'] = 'negative'

        outputs[i]=text    
    
    return outputs
    #return f"test is {prediction}"

FLAGS = re.MULTILINE | re.DOTALL

#group = regex Return the string matched by the RE.SUB (several match by tweet)

def hashtag(text):
    text = text.group()
    hashtag_body = text[1:]
    if hashtag_body.isupper():
        result = "<hashtag> {} <allcaps>".format(hashtag_body.lower())
    else:
        result = " ".join(["<hashtag>"] + re.split(r"(?=[A-Z])", hashtag_body, flags=FLAGS))
    return result

def allcaps(text):
    text = text.group()
    return text.lower()+" <allcaps> "

def repeat(text):
    text = text.group()
    t = re.sub(r'(.)\1{2,}', r'\1', text)
    if text == t:
        return text
    else:
        return t+' <repeat> '

def pps_glove(text):
    # Different regex parts to combined for smiley faces  
    eyes = r"[8:=;]"
    nose = r"['`\-]?"

    # function so code less repetitive
    def re_sub(pattern, repl):
        return re.sub(pattern, repl, text, flags=FLAGS)

    text = re_sub(r"https?:\/\/\S+\b|www\.(\w+\.)+\S*", "<url>")
    text = re_sub(r"@\w+", "<user>")
    text = re_sub(r"{}{}[)dD]+|[)dD]+{}{}".format(eyes, nose, nose, eyes), "<smile>")
    text = re_sub(r"{}{}p+".format(eyes, nose), "<lolface>")
    text = re_sub(r"{}{}\(+|\)+{}{}".format(eyes, nose, nose, eyes), "<sadface>")
    text = re_sub(r"{}{}[\/|l*]".format(eyes, nose), "<neutralface>")
    #separator for backslash to identify the two words 
    text = re_sub(r"/"," / ")
    text = re_sub(r"<3","<heart>")
    text = re_sub(r"[-+]?[.\d]*[\d]+[:,.\d]*", "<number>")
    text = re_sub(r"#\w+", hashtag)  
    # tag in the word from the repeating letter until the end yeeees ==> text=eees =transform=> es <repeat>
    text = re_sub(r'(.)\1{2,}\w+', repeat)
    # tag repeating letter with a space just before (for this !!!!!!)
    text = re_sub(r' (.)\1{2,}', repeat)
    text = re_sub(r"\b(\S*?)(.)\2{2,}\b", r"\1\2 <elong>")
    text = re_sub(r"([a-zA-Z<>()])([?!.:;,])", r"\1 \2")
    text = re_sub(r"\(([a-zA-Z<>]+)\)", r"( \1 )")
    #flag allcaps 
    text = re_sub(r" ([A-Z]){2,} ", allcaps)

    
    return text.lower()

def contraction(text):
    return contractions.fix(text)

def remove_apostrophe(text):
    return re.sub(r"['`´()]", r" ", text, flags=FLAGS)

def pre_pro(data, dim):
    #main function
    data['text'] = data['text'].apply(pps_glove)

    #contraction (after smiley and flag)
    data['text'] = data['text'].apply(contraction)

    #apostrophre separation for you're, brother's, i'm etc (after contraction) replace with a space
    data['text'] = data['text'].apply(remove_apostrophe)
    
    X = data.text.astype(str)
    
    with open('./source_dir/tok1.json') as f:
        data = json.load(f)
        t = tokenizer_from_json(data)
    
    seq = t.texts_to_sequences(X)
    
    seq_pad = sequence.pad_sequences(seq, maxlen=dim)
   
    return seq_pad

Overwriting source_dir/score.py


## Inference Config

In [91]:
from azureml.core.model import InferenceConfig

In [210]:
inference_config = InferenceConfig(
    environment=env,
    source_directory="./source_dir",
    entry_script="./score.py",
)

## Deploy model

In [80]:
from azureml.core.webservice import LocalWebservice

deployment_config = LocalWebservice.deploy_configuration(port=6789)

In [211]:
service = Model.deploy(
    ws,
    "myservice",
    [model2],
    inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

Downloading model glove_sample:4 to C:\Users\favre\AppData\Local\Temp\azureml_t6uhea82\glove_sample\4
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry 38d02f2005824c85859d3a5c11c9a143.azurecr.io
Logging into Docker registry 38d02f2005824c85859d3a5c11c9a143.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM 38d02f2005824c85859d3a5c11c9a143.azurecr.io/azureml/azureml_e445f60a37dcdf195e0bfe84260151b7
 ---> 48b62ad833b7
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> 5faf7a18ee9d
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6ImI5MDUzY2JmLWJlNTUtNGU4My04YzAzLWQ2YjBlYjkwY2I1YSIsInJlc291cmNlR3JvdXBOYW1lIjoicHJvamV0XzciLCJhY2NvdW50TmFtZSI6InByb2pldF83Iiwid29ya3NwYWNlSWQiOiIzOGQwMmYyMC0wNTgyLTRjODUtODU5ZC0zYTVjMTFjOWExNDMifSwibW9kZWxzIjp7fSwibW9kZWxzSW5mbyI6e319 | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in c01313c76e65
 ---> 0a97054cd30a
Step 4/5 : RUN 

## Test model

In [223]:
import requests
import json

uri = service.scoring_uri
requests.get("http://localhost:6789")
headers = {"Content-Type": "application/json"}
data = {
    0: "@dider the movie was really baaaaad, catastrophic acting :-(",
    1: "OMG what an awesome concert !!!!!!! so good :-)",
    2: "@dider the movie was really baaaaad, catastrophic acting"
}
data = json.dumps(data)
response = requests.post(uri, data=data, headers=headers)
response.json()

{'0': {'text': '@dider the movie was really baaaaad, catastrophic acting :-(',
  'score': 0.3742055892944336,
  'sentiment': 'negative'},
 '1': {'text': 'OMG what an awesome concert !!!!!!! so good :-)',
  'score': 0.893543541431427,
  'sentiment': 'positive'},
 '2': {'text': '@dider the movie was really baaaaad, catastrophic acting',
  'score': 0.4965532422065735,
  'sentiment': 'negative'}}