In [1]:
import os
from azureml.core import Workspace
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.webservice import AciWebservice
from azureml.core.model import Model
from torchvision.datasets import CIFAR10
from torchvision import transforms
import random
import cv2
import numpy as np
import torch
import matplotlib.pyplot as plt
import json
import requests

In [2]:
ws = Workspace.from_config()
ws

Workspace.create(name='shuit-ml-workspace', subscription_id='902f236f-44df-463a-a5cb-1516ab2a9cd2', resource_group='shuit-common')

In [11]:
%%writefile pred/score.py

# score.pyの出力

import json
import numpy as np
import onnxruntime
import sys
import os
import time
import torch

from transformers import BertJapaneseTokenizer


def init():
    global session, input_ids_name, attention_mask_name, token_type_ids_name, output_name, tokenizer
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.onnx')
    session = onnxruntime.InferenceSession(model, None)
    input_ids_name = session.get_inputs()[0].name
    attention_mask_name = session.get_inputs()[1].name
    token_type_ids_name = session.get_inputs()[2].name
    output_name = session.get_outputs()[0].name 
    tokenizer = BertJapaneseTokenizer.from_pretrained(
        'cl-tohoku/bert-base-japanese-whole-word-masking')
    

def preprocess(input_data_json):
    # convert the JSON data into the tensor input
    input = json.loads(input_data_json)['data']
    print(input)
    tokenized_input = tokenizer(
        input,
        padding=True,
        max_length=512,
        truncation=True,
        return_tensors="pt")
    input_ids = tokenized_input["input_ids"].to('cpu').detach().numpy().copy()
    attention_mask = tokenized_input["attention_mask"].to('cpu').detach().numpy().copy()
    token_type_ids = tokenized_input["token_type_ids"].to('cpu').detach().numpy().copy()
    print(input_ids)
    return (input_ids, attention_mask, token_type_ids)

def postprocess(result):
    # We use argmax to pick the highest confidence label
    return int(np.argmax(np.array(result).squeeze(), axis=0))
    
def run(input_data):

    try:
        # load in our data, convert to readable format
        data = preprocess(input_data)
        
        # start timer
        start = time.time()
        
        r = session.run([output_name], {input_ids_name: data[0], attention_mask_name:data[1], token_type_ids_name:data[2]})
        print(r)
        #end timer
        end = time.time()
        
        result = postprocess(r)
        result_dict = {"result": result,
                      "time_in_sec": end - start}
    except Exception as e:
        result_dict = {"error": str(e)}
    
    return result_dict

def choose_class(result_prob):
    """We use argmax to determine the right label to choose from our output"""
    return int(np.argmax(result_prob, axis=0))

Overwriting pred/score.py


In [12]:
# 推論環境の定義ファイル生成と環境設定

myenv = CondaDependencies.create(pip_packages=["numpy", "onnxruntime", "azureml-core", "azureml-defaults", "transformers", "fugashi", "ipadic", "torch"])
env_file_path = os.path.join("pred", "environment.yml")
score_file_path = os.path.join("pred", "score.py")

with open(env_file_path, "w") as f:
    f.write(myenv.serialize_to_string())

env = Environment.from_conda_specification(name="onnx_env", file_path=env_file_path)
inference_config = InferenceConfig(entry_script=score_file_path, environment=env)

In [13]:
# ACI設定
aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {'framework': 'onnx'}, 
                                               description = 'bert fine-tuned for livedoor news corpus')

In [14]:
# モデル指定
model = Model(ws, 'bert-livedoor-model')

In [16]:
# デプロイ
aci_service_name = 'bert-livedoor'
print("Service", aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

Service bert-livedoor
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-19 07:10:43+00:00 Creating Container Registry if not exists.
2021-05-19 07:10:44+00:00 Registering the environment.
2021-05-19 07:10:44+00:00 Use the existing image.
2021-05-19 07:10:45+00:00 Generating deployment configuration.
2021-05-19 07:10:45+00:00 Submitting deployment to compute..
2021-05-19 07:10:52+00:00 Checking the status of deployment bert-livedoor..
2021-05-19 07:15:28+00:00 Checking the status of inference endpoint bert-livedoor.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [17]:
# 推論
endpoint = aci_service.scoring_uri
input_data = json.dumps({'data': ["この副題はどこに分類される？"]})
res = requests.post(url=endpoint, data=input_data, headers={'Content-Type': 'application/json'})
res.json()

{'result': 7, 'time_in_sec': 0.07662081718444824}

In [18]:
print("予測値: "+str(res.json()["result"]))
#print("正解: "+str(int(target_class)))

予測値: 7
