In [1]:
from azureml.core import Workspace
import azureml.core

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 0.1.0.1027341


In [26]:
ws = Workspace.get_or_create(name='mladsdemo2', location='eastus2', 
                             subscription_id='938fa533-eeb9-4121-b97f-05b31c6eb088', resource_group='mladstest')
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: mladsdemo2
Azure region: eastus2
Subscription id: 938fa533-eeb9-4121-b97f-05b31c6eb088
Resource group: mladstest


In [27]:
# get the text data from the github repo and unzip it
from fit_and_store_pipeline import unzip_file_here
import urllib
import os

if not os.path.isfile('./text_data/attack_data.csv'):
    if not os.path.isfile('./text_data.zip'): 
        urllib.request.urlretrieve('https://activelearning.blob.core.windows.net/activelearningdemo/text_data.zip', 'text_data.zip')
    unzip_file_here('text_data.zip')

if not os.path.isfile('miniglove_6B_50d_w2v.txt'):
    unzip_file_here('miniglove_6B_50d_w2v.zip')
    
print('Data files here')

Data files here


In [49]:
# skip training for demo, 'attack_model.pkl' is pre-trained in repo

# train the model on 'train_set_01.csv' and save it into 'attack_model.pkl'
# from fit_and_store_pipeline import *
# script_main()

In [28]:
# Featurize the data to be scored

import pandas as pd
import numpy as np
import gensim
import random
import gensim
from gensim.models import KeyedVectors
from pipeline_parts import *

test_set_file = "test_set_01.csv"
text_data_file = "text_data/attack_data.csv"
w2v_file = 'miniglove_6B_50d_w2v.txt' # convert glove file to w2v format using gensim.scripts.glove2word2vec

word_vectors = KeyedVectors.load_word2vec_format(w2v_file, binary=False)

text_data = pd.read_csv(text_data_file, encoding='windows-1252')
text_data = text_data.set_index("rev_id")

test_set_rev_ids = pd.read_csv(test_set_file).rev_id
test_data = text_data.loc[test_set_rev_ids]

preprocessor = GensimPreprocessor()
vectorizer = AvgWordVectorFeaturizer(word_vectors)

tokens = preprocessor.transform(test_data.comment[0:1000])
vectors = vectorizer.transform(tokens)


In [50]:
%%writefile myenv.yml
name: myenv
channels:
  - defaults
dependencies:
  - pip:
    - numpy
    - scikit-learn
    # Required packages for AzureML execution, history, and data preparation.
    - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1
    - azureml-core

Overwriting myenv.yml


### Create `score.py` file
The `%%writefile` cell magic is used to write the scoring function to a local file. 

In [43]:
%%writefile score.py
import pickle
import json
import numpy
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier
from azureml.assets.persistence.persistence import get_model_path

def init():
    global model
    model_path = get_model_path('attack_model.pkl')
    model = joblib.load(model_path)

# note you can pass in multiple rows for scoring
def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = numpy.array(data)
        result = model.predict(data)
    except Exception as e:
        result = str(e)
    return json.dumps({"result": result.tolist()})

Overwriting score.py


In [37]:
# you may need to register the provider in your subscription 
# az provider register -n Microsoft.ContainerInstance

from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 4, 
                                               tags = ['MLADS'], 
                                               description = 'Toxicity scoring')

In [53]:
%%time
# this will take 5-10 minutes to finish
# you can also use "az container list" command to find the ACI being deployed
service = ws.deploy_webservice(name = 'my-svc3',
                               deploy_config = aciconfig,
                               target = None,
                               models = ['attack_model.pkl'],
                               runtime = 'python',
                               conda_file = 'myenv.yml',
                               driver = 'score.py')
service.wait_for_deployment(show_output = True)

Registering model attack_model.pkl


Client-Request-ID=39074762-6e9b-11e8-b96b-dc53600a4933 Retry policy did not allow for a retry: Server-Timestamp=Tue, 12 Jun 2018 23:49:19 GMT, Server-Request-ID=47ceeaa0-c01e-011c-40a7-026edf000000, HTTP status code=409, Exception=The specified container already exists.ErrorCode: ContainerAlreadyExists<?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.RequestId:47ceeaa0-c01e-011c-40a7-026edf000000Time:2018-06-12T23:49:19.3903355Z</Message></Error>.
Client-Request-ID=3952ac3a-6e9b-11e8-985e-dc53600a4933 Retry policy did not allow for a retry: Server-Timestamp=Tue, 12 Jun 2018 23:49:19 GMT, Server-Request-ID=76e71f31-201e-009a-0ba7-027c58000000, HTTP status code=409, Exception=The specified container already exists.ErrorCode: ContainerAlreadyExists<?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.RequestId:76e71f31-201e-009a-0ba7-027c5

Creating image
Image creation operation finished for image my-svc3:2, operation "Succeeded"
Creating service


WebserviceException: Received bad response from Model Management Service:
Response Code: 409
Headers: {'Date': 'Tue, 12 Jun 2018 23:51:31 GMT', 'Content-Type': 'application/json', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'api-supported-versions': '2018-03-01-preview', 'x-ms-client-request-id': '02d3f22b8c6a4aeeb6896ca74f5711c0', 'x-ms-client-session-id': '', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains; preload'}
Content: b'{"code":"Conflict","statusCode":409,"message":"Conflict","details":[{"code":"ServiceWithNameExists","message":"Workspace mladsdemo2 already contains a Service with name: my-svc3"}]}'


## Test web service

In [51]:
print('web service is hosted in ACI:', service.scoring_uri)

web service is hosted in ACI: http://23.96.3.128:5001/score


Feed 100 rows of data to get predictions.

In [52]:
import json

test_samples = json.dumps({"data": vectors.tolist()})
test_samples = bytes(test_samples, encoding = 'utf8')

# Call scoring service
service.run(input_data = test_samples)

'{"result": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 

## Delete ACI to clean up

Deleting ACI is super fast!

In [11]:
%%time 
service.delete()

Wall time: 1.68 s


In [12]:
# Handy cleanup cell to delete all existing services 
from azureml.core.webservice import Webservice

services = Webservice.list(workspace=ws)
for i in range(0, len(services)):
    try:
        services[i].delete()
    except:
        pass

In [48]:
service.get_logs()

