# Notebook to deploy the backbreak's predictions as a web service

In [1]:
from azureml.core import Workspace, Dataset
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.webservice import AciWebservice
from azureml.data.tabular_dataset import TabularDataset

import joblib
from sklearn.ensemble import RandomForestClassifier
import numpy as np 
import pandas as pd 
import sklearn

In [2]:
df = pd.read_csv('BACK_DEF.csv')
df.set_index("Unnamed: 0", inplace=True)

X_df = df.drop('Percentil_Rango', axis=1)
y_df = df['Percentil_Rango']
X_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25000 entries, 0 to 24999
Data columns (total 8 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Burden (t)                       25000 non-null  float64
 1   Hole diameter (m)                25000 non-null  float64
 2   Spacing (m)                      25000 non-null  float64
 3   Stemming (m^3)                   25000 non-null  float64
 4   Hole depth (m)                   25000 non-null  float64
 5   Specific charge (t)              25000 non-null  float64
 6   Number of rows (explosive load)  25000 non-null  float64
 7   Backbreak (index)                25000 non-null  float64
dtypes: float64(8)
memory usage: 1.7 MB


In [3]:
# Defining model

model = RandomForestClassifier(n_estimators=100, max_depth=80, min_samples_split=0.5, min_samples_leaf=1).fit(X_df,y_df)
joblib.dump(model,'sklearn_RF_model.pkl')

['sklearn_RF_model.pkl']

In [4]:
# Connect to the Workspace
ws = Workspace.from_config()

# The default datastore is a blob storage container where datasets are stored
datastore = ws.get_default_datastore()

# Register the dataset
ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe=df, 
        name='Backbreak dataset', 
        description='Backbreak',
        target=datastore
    )

X = ds.drop_columns("Percentil_Rango")
y = ds.keep_columns("Percentil_Rango")

# Display information about the dataset
print(ds.name + " v" + str(ds.version) + ' (ID: ' + ds.id + ")")

Validating arguments.
Arguments validated.
Validating arguments.
Arguments validated.
'overwrite' is set to True. Any file already present in the target will be overwritten.
Uploading files from 'C:/Users/alexm/AppData/Local/Temp/tmpe44t3c07' to 'managed-dataset/1ce00887-6c6a-471c-83e7-e8539587caa6/'
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.
Backbreak dataset v10 (ID: a0b0a5f6-4799-4964-81f1-8b32123e004d)


In [5]:
# Registering model

model = Model.register(workspace=ws,
                       model_name='RF-model',                # Name of the registered model in your workspace.
                       model_path='./sklearn_RF_model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       sample_input_dataset=X,
                       sample_output_dataset=y,
                       resource_configuration=ResourceConfiguration(cpu=2, memory_in_gb=4),
                       description='RF clasification model to predict backbreak.',
                       tags={'area': 'backbreak', 'type': 'clasification'})

print('Name:', model.name)
print('Version:', model.version)

Registering model RF-model
Name: RF-model
Version: 6


In [6]:
%%writefile score_back.py

import json
import pickle
import numpy as np
import pandas as pd
import os
import joblib
from azureml.core.model import Model

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType


def init():
    global model
    # Replace filename if needed.
    path = os.getenv('AZUREML_MODEL_DIR') 
    model_path = os.path.join(path, 'sklearn_RF_model.pkl')
    # Deserialize the model file back into a sklearn model.
    model = joblib.load(model_path)


input_sample = pd.DataFrame(data=[{
    "Burden (t)": 5.62,
    "Hole diameter (m)": 6.74,
    "Spacing (m)": 6.54,
    "Stemming (m^3)": 2.75,
    "Hole depth (m)": 9.3,
    "Specific charge (t)": 0.42,
    "Number of rows (explosive load)": 5.12,
    "Backbreak (index)": 4.73,
}])

# This is an integer type sample. Use the data type that reflects the expected result.
output_sample = np.array([0])

# To indicate that we support a variable length of data input,
# set enforce_shape=False
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        print("input_data....")
        print(data.columns)
        print(type(data))
        result = model.predict(data)
        print("result.....")
        print(result)
    # You can return any data type, as long as it can be serialized by JSON.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

Overwriting score_back.py


In [7]:
from azureml.core.model import InferenceConfig
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'pandas',
    'scikit-learn=={}'.format(sklearn.__version__)
])

inference_config = InferenceConfig(entry_script='./score_back.py',environment=environment)

In [8]:
# Deploying the service

aci_config = AciWebservice.deploy_configuration(cpu_cores = 1,
                                                memory_gb = 1,
                                                auth_enabled=True)

service_name = 'my-backbreak-model'

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config=aci_config, overwrite=True)

service.wait_for_deployment(show_output=True)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(ws, service_name, [model], inference_config, deployment_config=aci_config, overwrite=True)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-12-10 19:41:28+01:00 Creating Container Registry if not exists.
2023-12-10 19:41:28+01:00 Registering the environment.
2023-12-10 19:41:30+01:00 Use the existing image.
2023-12-10 19:41:30+01:00 Generating deployment configuration.
2023-12-10 19:41:30+01:00 Submitting deployment to compute.
2023-12-10 19:41:32+01:00 Checking the status of deployment my-backbreak-model..
2023-12-10 19:44:04+01:00 Checking the status of inference endpoint my-backbreak-model.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [9]:
import json

input_payload = json.dumps({
    'data': X_df[0:2].values.tolist()
})

output = service.run(input_payload)

print(output)

['5-10%', '0-5%']


In [10]:
print(service.scoring_uri)
print(service.swagger_uri)

http://82dc32d0-e323-4cf6-ae96-b32b892c9e2b.francecentral.azurecontainer.io/score
http://82dc32d0-e323-4cf6-ae96-b32b892c9e2b.francecentral.azurecontainer.io/swagger.json


In [11]:
primary, secondary = service.get_keys()
print(primary)

Tlj5Tar7gPGdBpVM6rX8p6eNHZdQqfzG


In [3]:
import requests
import json

# URL for the web service
scoring_uri = 'http://82dc32d0-e323-4cf6-ae96-b32b892c9e2b.francecentral.azurecontainer.io/score'
# If the service is authenticated, set the key or token
key = 'Tlj5Tar7gPGdBpVM6rX8p6eNHZdQqfzG'

set2 = [
                5.313270987393447,
                7.541981086635956,
                9.702078178995714,
                4.182776584238159,
                9.354004597971354,
                0.407808473556946,
                5.619456740301016,
                4.934506657692589]
 
# Two sets of data to score, so we get two results back
data = {"data": [set2]}

# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

["85-90%"]
