In [1]:
from azureml.core import Workspace
import azureml.core

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 0.1.0.988557


In [3]:
ws = Workspace.get_or_create(name='mywsprod2', location='eastus2euap', subscription_id='7c110815-49dd-4740-b8db-ee2a42eb54f2', resource_group='marinchprodrg2')
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: mywsprod2
Azure region: eastus2euap
Subscription id: 7c110815-49dd-4740-b8db-ee2a42eb54f2
Resource group: marinchprodrg2


In [59]:
# get the text data from the github repo and unzip it
from fit_and_store_pipeline import unzip_file_here
import urllib
import os

if not os.path.isfile('./text_data/attack_data.csv'):
    if not os.path.isfile('./text_data.zip'): 
        urllib.request.urlretrieve('https://activelearning.blob.core.windows.net/activelearningdemo/text_data.zip', 'text_data.zip')
    unzip_file_here('text_data.zip')

if not os.path.isfile('miniglove_6B_50d_w2v.txt'):
    unzip_file_here('miniglove_6B_50d_w2v.zip')
    
print('Data files here')

Data files here


In [65]:
# Featurize the data to be scored

import pandas as pd
import numpy as np
import gensim
import random
import gensim
from gensim.models import KeyedVectors
from pipeline_parts import *

test_set_file = "test_set_01.csv"
text_data_file = "text_data/attack_data.csv"
w2v_file = 'miniglove_6B_50d_w2v.txt' # convert glove file to w2v format using gensim.scripts.glove2word2vec

word_vectors = KeyedVectors.load_word2vec_format(w2v_file, binary=False)

text_data = pd.read_csv(text_data_file, encoding='windows-1252')
text_data = text_data.set_index("rev_id")

test_set_rev_ids = pd.read_csv(test_set_file).rev_id
test_data = text_data.loc[test_set_rev_ids]

preprocessor = GensimPreprocessor()
vectorizer = AvgWordVectorFeaturizer(word_vectors)

tokens = preprocessor.transform(test_data.comment[0:1000])
vectors = vectorizer.transform(tokens)


In [44]:
%%writefile myenv.yml
name: myenv
channels:
  - defaults
dependencies:
  - pip:
    - numpy
    - scikit-learn

Overwriting myenv.yml


### Create `score.py` file
The `%%writefile` cell magic is used to write the scoring function to a local file. 

In [56]:
%%writefile score.py
import pickle
import json
import numpy
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier

def init():
    global model
    model = joblib.load("attack_model.pkl")

# note you can pass in multiple rows for scoring
def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = numpy.array(data)
        result = model.predict(data)
    except Exception as e:
        result = str(e)
    return json.dumps({"result": result.tolist()})

Overwriting score.py


In [45]:
# you may need to register the provider in your subscription 
# az provider register -n Microsoft.ContainerInstance

from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = ['MLADS'], 
                                               description = 'Toxicity scoring')

In [52]:
%%time
# this will take 5-10 minutes to finish
# you can also use "az container list" command to find the ACI being deployed
service = ws.deploy_webservice(name = 'my-svc5',
                               deploy_config = aciconfig,
                               target = None,
                               models = ['attack_model.pkl'],
                               runtime = 'python',
                               conda_file = 'myenv.yml',
                               driver = 'score.py')
service.wait_for_deployment(show_output = True)

Client-Request-ID=3ad7ede2-6e78-11e8-beb1-54ee75d60810 Retry policy did not allow for a retry: Server-Timestamp=Tue, 12 Jun 2018 19:38:50 GMT, Server-Request-ID=63b81047-b01e-0003-1684-021745000000, HTTP status code=409, Exception=The specified container already exists.ErrorCode: ContainerAlreadyExists<?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.RequestId:63b81047-b01e-0003-1684-021745000000Time:2018-06-12T19:38:50.1864896Z</Message></Error>.


Registering model: attack_model.pkl


Client-Request-ID=3ccaf7c0-6e78-11e8-abfa-54ee75d60810 Retry policy did not allow for a retry: Server-Timestamp=Tue, 12 Jun 2018 19:38:53 GMT, Server-Request-ID=45197bd9-e01e-0088-7084-021328000000, HTTP status code=409, Exception=The specified container already exists.ErrorCode: ContainerAlreadyExists<?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.RequestId:45197bd9-e01e-0088-7084-021328000000Time:2018-06-12T19:38:53.4348160Z</Message></Error>.
Client-Request-ID=3d0e07be-6e78-11e8-bc9f-54ee75d60810 Retry policy did not allow for a retry: Server-Timestamp=Tue, 12 Jun 2018 19:38:53 GMT, Server-Request-ID=35ed56f2-f01e-002d-6984-024552000000, HTTP status code=409, Exception=The specified container already exists.ErrorCode: ContainerAlreadyExists<?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.RequestId:35ed56f2-f01e-002d-6984-02455

Creating image
Image creation operation finished for image my-svc5:1, operation "Succeeded"
Creating service
Running.................................................
SucceededACI service creation operation finished, operation "Succeeded"
Wall time: 5min 54s



## Test web service

In [53]:
print('web service is hosted in ACI:', service.scoring_uri)

web service is hosted in ACI: http://40.114.78.215:5001/score


Feed 100 rows of data to get predictions.

In [67]:
import json

test_samples = json.dumps({"data": vectors.tolist()})
test_samples = bytes(test_samples, encoding = 'utf8')

# Call scoring service
service.run(input_data = test_samples)

'{"result": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 

## Delete ACI to clean up

Deleting ACI is super fast!

In [37]:
%%time 
service.delete()

Wall time: 1.6 s


In [51]:
# Handy cleanup cell to delete all existing services 
from azureml.core.webservice import Webservice

services = Webservice.list(workspace=ws)
for i in range(0, len(services)):
    try:
        services[i].delete()
    except:
        pass