# 学習したモデルを保存する

In [1]:
# Create the serialized object using joblib dump
import pandas as pd

df = pd.read_csv('./data/adultincome trunc.csv')
data1 = df.copy()

# MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()


# Get the numeric columns from data1
columns = data1.select_dtypes(include='number').columns

# Fit the data to the scaler object
scaler_fitted = scaler.fit(data1[columns])

# Transform the data using the fitted scaler object
data1[columns] = scaler_fitted.transform(data1[columns])

# joblibでSerialiseを行う
import joblib
obj_file = './outputs/scaler.pkl'
joblib.dump(value=scaler_fitted, filename=obj_file)

['./outputs/scaler.pkl']

In [7]:
# deserialize
data2 = pd.read_csv('./data/adultincome trunc.csv')

import joblib
obj_file = './outputs/scaler.pkl'
sc = joblib.load(obj_file)

data2[columns] = sc.transform(data2[columns])
data2.head()

Unnamed: 0,age,wc,education,marital status,race,gender,hours per week,IncomeClass
0,0.277778,Private,HS-grad,Divorced,White,Male,0.385417,<=50K
1,0.138889,Private,Bachelors,Married,Black,Female,0.385417,<=50K
2,0.263889,Private,Masters,Married,White,Female,0.385417,<=50K
3,0.180556,Private,Masters,Never-married,White,Female,0.489583,>50K
4,0.333333,Private,Bachelors,Married,White,Male,0.385417,>50K


In [8]:
# data2と1が同じ⇒モデルを保存できていたことが分かる
data1.head()

Unnamed: 0,age,wc,education,marital status,race,gender,hours per week,IncomeClass
0,0.277778,Private,HS-grad,Divorced,White,Male,0.385417,<=50K
1,0.138889,Private,Bachelors,Married,Black,Female,0.385417,<=50K
2,0.263889,Private,Masters,Married,White,Female,0.385417,<=50K
3,0.180556,Private,Masters,Never-married,White,Female,0.489583,>50K
4,0.333333,Private,Bachelors,Married,White,Male,0.385417,>50K


# get_dummyのような処理を保存する
* モデルのようにインスタンス化ができない

In [11]:
import pandas as pd

# Creating dataframes
X_train = pd.DataFrame({'name'      : ['Jitesh', 'Rahul', 'John', 'Bill'],
                        'income'    : [40, 50, 60, 70],
                        'education' : ['Grad', 'Grad', 'Post-Grad', 'Post-Grad']})

# Create dummy variables
X_train_enc = pd.get_dummies(X_train)
display(X_train_enc)

# Extract column names as the index object
train_enc_cols = X_train_enc.columns

# Columnのリストをserialiseする
import joblib
obj_file = './outputs/columns.pkl'
joblib.dump(value=train_enc_cols, filename=obj_file)

Unnamed: 0,income,name_Bill,name_Jitesh,name_John,name_Rahul,education_Grad,education_Post-Grad
0,40,0,1,0,0,1,0
1,50,0,0,0,1,1,0
2,60,0,0,1,0,0,1
3,70,1,0,0,0,0,1


['./outputs/columns.pkl']

In [12]:
# Pseudo Production run
ref_cols = joblib.load(obj_file)


# Example Input data for the webservice
X_deploy = pd.DataFrame({'name'      : ['Jitesh'],
                         'income'    : [70],
                         'education' : ['Post-Grad1']})
 
# Create dummy variables of the production data
X_deploy_enc = pd.get_dummies(X_deploy)

# Extract column names of prod data
deploy_cols = X_deploy_enc.columns

# Extract columns present in training but not in prod
missing_cols = ref_cols.difference(deploy_cols)
print(missing_cols)

Index(['education_Grad', 'education_Post-Grad', 'name_Bill', 'name_John',
       'name_Rahul'],
      dtype='object')


In [14]:
# 足りない列を追加する
for cols in missing_cols:
    X_deploy_enc[cols] = 0

# 他の列があれば削除、順序を合わせる
X_deploy_enc = X_deploy_enc[ref_cols]

# Trainingに無いが、Productionだけにあるデータを検出する
extra_cols = deploy_cols.difference(ref_cols)
if len(extra_cols):
    print("Extra category found")

Extra category found


# モデルと列名を保存する
* これまでの実践

In [17]:
# Import required classes from Azureml
from azureml.core import Workspace, Dataset, Experiment

# Access the Workspace and Datasets
print('Accessing the workspace....')
ws = Workspace.from_config("./config")

print('Accessing the dataset....')
az_dataset = Dataset.get_by_name(ws, 'adultincometrunc')

# Create/Access an experiment object
print('Accessing/Creating the experiment...')
experiment = Experiment(workspace=ws, name='Webservice-exp001')

# Run an experiment using start_logging method
print('Start Experiment using Start Logging method...')
new_run = experiment.start_logging()

# Do your stuff here
import pandas as pd

# Load the data from the local files
print('Loading the dataset to pandas dataframe...')
df = az_dataset.to_pandas_dataframe()

# Create X and Y Variables
X = df.iloc[:, :-1]
Y = df.iloc[:, -1:]

# Create dummy variables
X = pd.get_dummies(X)

# Extract column names including dummy variables
train_enc_cols = X.columns

# Transform Categorical columns in Y dataset to dummy
Y = pd.get_dummies(Y)
Y = Y.iloc[:,-1]


# Split Data - X and Y datasets are training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = \
train_test_split(X, Y, test_size = 0.3, random_state = 1234, stratify=Y)


# Build the Random Forest model
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state=1234)


# Fit the data to the Random Forest object - Train Model
trained_model = rfc.fit(X_train, Y_train)


# Predict the outcome using Test data - Score Model 
Y_predict = rfc.predict(X_test)

# Get the probability score - Scored Probabilities
Y_prob = rfc.predict_proba(X_test)[:, 1]

# Get Confusion matrix and the accuracy/score - Evaluate
from sklearn.metrics import confusion_matrix
cm    = confusion_matrix(Y_test, Y_predict)
score = rfc.score(X_test, Y_test)

# Always log the primary metric
new_run.log("accuracy", score)

# Save all the transformations and models
import joblib
model_file = './outputs/models.pkl'

joblib.dump(value=[train_enc_cols, trained_model], 
            filename=model_file)

# Complete the run
new_run.complete()

Accessing the workspace....
Accessing the dataset....
Accessing/Creating the experiment...
Start Experiment using Start Logging method...
Loading the dataset to pandas dataframe...


# RunIDを使ってモデルを登録する

In [18]:
# Get the Run IDs from the experiment
list(experiment.get_runs())

[Run(Experiment: Webservice-exp001,
 Id: d90a48f2-c1d9-465c-8679-276a2bbf7e66,
 Type: None,
 Status: Completed),
 Run(Experiment: Webservice-exp001,
 Id: 78588de1-c544-4dc7-9493-74fc1f8ff1e7,
 Type: None,
 Status: Completed)]

In [19]:
from azureml.core import Workspace, Model

# Access the workspace using config.json
ws = Workspace.from_config("./config")

# 取得したRun_IDを使用する
new_run = ws.get_run('78588de1-c544-4dc7-9493-74fc1f8ff1e7')

In [20]:
# モデルをWorkspaceに登録する（ローカルではなく、クラウドから登録する）
new_run.register_model(model_path='outputs/models.pkl', 
                       model_name='AdultIncome_models',
                       tags={'source':'SDK Run', 'algorithm':'RandomForest'},
                       properties={'Accuracy': new_run.get_metrics()['accuracy']}, # tagはdeleteできるがPropertyは削除できない
                       description="Combined Models from Run")

Model(workspace=Workspace.create(name='Azureml-SDK-WS01', subscription_id='3467f739-a57b-4612-9de8-72a6616c01b3', resource_group='AzuremlSDKRG00'), name=AdultIncome_models, id=AdultIncome_models:1, version=1, tags={'source': 'SDK Run', 'algorithm': 'RandomForest'}, properties={'Accuracy': '0.79'})

# ローカルモデルを登録する

In [21]:
from azureml.core import Model

Model.register(workspace=ws,
               model_path='./outputs/models.pkl', # local path
               model_name='AdultIncome_model_local',
               tags={'source':'SDK-Local', 'algorithm':'RandomForest'},
               properties={'Accuracy': 0.7866},
               description='AdultIncome model from Local'
               )

Registering model AdultIncome_model_local


Model(workspace=Workspace.create(name='Azureml-SDK-WS01', subscription_id='3467f739-a57b-4612-9de8-72a6616c01b3', resource_group='AzuremlSDKRG00'), name=AdultIncome_model_local, id=AdultIncome_model_local:1, version=1, tags={'source': 'SDK-Local', 'algorithm': 'RandomForest'}, properties={'Accuracy': '0.7866'})

# 登録してあるモデルを使用する

In [22]:
# Retrieve the registered models
Model.list(ws)

[Model(workspace=Workspace.create(name='Azureml-SDK-WS01', subscription_id='3467f739-a57b-4612-9de8-72a6616c01b3', resource_group='AzuremlSDKRG00'), name=AdultIncome_model_local, id=AdultIncome_model_local:1, version=1, tags={'source': 'SDK-Local', 'algorithm': 'RandomForest'}, properties={'Accuracy': '0.7866'}),
 Model(workspace=Workspace.create(name='Azureml-SDK-WS01', subscription_id='3467f739-a57b-4612-9de8-72a6616c01b3', resource_group='AzuremlSDKRG00'), name=AdultIncome_models, id=AdultIncome_models:1, version=1, tags={'source': 'SDK Run', 'algorithm': 'RandomForest'}, properties={'Accuracy': '0.79'})]

In [23]:
for model in Model.list(ws):  
    print('\n', model.name, 'version:', model.version)
    print('\t', 'Run_ID : ', model.run_id)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    
    for tags in model.tags:
        tag = model.tags[tags]
        print ('\t',tags, ':', tag)


 AdultIncome_model_local version: 1
	 Run_ID :  None
	 Accuracy : 0.7866
	 source : SDK-Local
	 algorithm : RandomForest

 AdultIncome_models version: 1
	 Run_ID :  78588de1-c544-4dc7-9493-74fc1f8ff1e7
	 Accuracy : 0.79
	 source : SDK Run
	 algorithm : RandomForest


# Inference Clusterを作成する

In [6]:
# jsonファイルの扱い方
# dict ⇒ json
import json
d1 = {'name' : ['Jitesh'],
     'age' : [40]}
d1_j = json.dumps(d1)
print(d1_j)

# json ⇒ dict
d2 = json.loads(d1_j)
print(d2)

# dict ⇒ pandas
import pandas as pd
df1 = pd.DataFrame.from_dict(d1)
df1.head()

{"name": ["Jitesh"], "age": [40]}
{'name': ['Jitesh'], 'age': [40]}


Unnamed: 0,name,age
0,Jitesh,40


In [7]:
# Import the Azure ML classes
from azureml.core import Workspace

# Access the workspace using config.json
print("Accessing the workspace from job....")
ws = Workspace.from_config("./config")

# Create custom environment
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

# Create the environment
myenv = Environment(name="MyEnvironment")

# Create the dependencies object
print("Creating dependencies....")
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pip', 'pandas'],
                                     pip_packages=['azureml-defaults'])

myenv.python.conda_dependencies = myenv_dep

# Register the environment
print("Registering the environment...")
myenv.register(ws)


# Create an Azure Kubernets Cluster
from azureml.core.compute import AksCompute, ComputeTarget

cluster_name = 'aks-cluster-001'

if cluster_name not in ws.compute_targets:
    print(cluster_name, "does not exist. Creating a new one...")
    print('Creating provisioniong config for Aks cluster....')

    aks_config = AksCompute.provisioning_configuration(location='eastus',
                                                       vm_size='STANDARD_D11_V2',
                                                       agent_count=1,
                                                       cluster_purpose='DevTest')

    print("Creating the AKS Cluster...")
    production_cluster = ComputeTarget.create(ws, cluster_name, aks_config)
    production_cluster.wait_for_completion(show_output=True)
else:
    print(cluster_name, "exists. Using it...")
    production_cluster = ws.compute_targets[cluster_name]

Accessing the workspace from job....


If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Creating dependencies....
Registering the environment...
aks-cluster-001 exists. Using it...


# Deploy

In [24]:
# Create Inference Configuration
from azureml.core.model import InferenceConfig

print("Creating Inference Configuration...")
# scoring_scriptの中身に日本語があるとUnicode Decode Errorが出るので注意
inference_config = InferenceConfig(source_directory = './script',
                                   entry_script='scoring_script.py',
                                   environment=myenv)

Creating Inference Configuration...


In [25]:
# Create service deployment configuration
from azureml.core.webservice import AksWebservice

print('Creating the Deployment configuration for webservice...')
deploy_config = AksWebservice.deploy_configuration(cpu_cores = 1,
                                                   memory_gb = 1)

Creating the Deployment configuration for webservice...


In [26]:
# Create and deploy the webservice
from azureml.core.model import Model

model = ws.models['AdultIncome_models']

print('Deploying the web service....')
service = Model.deploy(workspace=ws,
                       name = 'adultincome-service',
                       models = [model],
                       inference_config = inference_config,
                       deployment_config = deploy_config,
                       deployment_target = production_cluster)
service.wait_for_deployment(show_output = True)

Deploying the web service....


To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoint-sdk-v2 /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration. 
  service = Model.deploy(workspace=ws,


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-10-06 23:55:41+09:00 Creating Container Registry if not exists.
2022-10-06 23:55:42+09:00 Registering the environment.
2022-10-06 23:55:44+09:00 Use the existing image.
2022-10-06 23:55:46+09:00 Creating resources in AKS..
2022-10-06 23:55:53+09:00 Submitting deployment to compute.
2022-10-06 23:55:54+09:00 Checking the status of deployment adultincome-service..
2022-10-07 00:00:09+09:00 Checking the status of inference endpoint adultincome-service.
Succeeded
AKS service creation operation finished, operation "Succeeded"


# Web Serviceを実行

In [27]:
# Consume the service end point using workspace access.
# Import the Azure ML classes
from azureml.core import Workspace

# Access the workspace using config file
print("Accessing the workspace....")
ws = Workspace.from_config("./config")

# Access the service end points
print("Accessing the service end-points")
service = ws.webservices['adultincome-service']

# Prepare the input data
import json

x_new = {'age':[46],
         'wc':['Private'],
         'education':['Masters'],
         'marital status':['Married'],
         'race':['White'],
         'gender':['Male'],
         'hours per week':[60]}

# Convert the dictionary to a serializable list in json
json_data = json.dumps({"data": x_new})

# Call the web service
print("Calling the service...")
response = service.run(input_data = json_data)

# Collect and convert the response in local variable
print("Printing the predicted class...")
predicted_classes = json.loads(response)

print('\n', predicted_classes)

Accessing the workspace....
Accessing the service end-points
Calling the service...
Printing the predicted class...

 ['Greater Than 50K']


# REST APIで実行

In [1]:
import requests
import json

# Set the URI for the web service
scoring_uri = 'http://52.142.38.209:80/api/v1/service/adultincome-service/score'

# Prepare the input data and create the serialized JSON
x_new = {'age':[21],
         'wc':['Private'],
         'education':['HS-grad'],
         'marital status':['Never-married'],
         'race':['White'],
         'gender':['Male'],
         'hours per week':[30]}

# Convert the input data to a serialized JSON
json_data = json.dumps({"data": x_new})

# Create the headers with authorization key
key = 'S9bxkSzdOihqKzV4h1qrzv9zkqqchxNz'

# Set the content type and authorization
headers = {'Content-Type': 'application/json',
           'Authorization' : f'Bearer {key}'}

# Make the request using POST method and collect the response
response = requests.post(scoring_uri, json_data, headers=headers)
predicted_classes = json.loads(response.json())

print('\n', predicted_classes)


 ['Less Than 50K']
