# Notebook to deploy the backbreak's prediction web service

In [1]:
from azureml.core import Workspace, Dataset
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.webservice import AciWebservice
from azureml.data.tabular_dataset import TabularDataset

import joblib
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np 
import pandas as pd 
import sklearn

In [6]:
df = pd.read_csv('AIR_DEF.csv')
df.set_index("Unnamed: 0", inplace=True)

X_df = df.drop('Percentil_Rango', axis=1)
y_df = df['Percentil_Rango']
X_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25000 entries, 0 to 24999
Data columns (total 8 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Impulse (I)                      25000 non-null  float64
 1   Air density (pa)                 25000 non-null  float64
 2   Positive Phase Duration (tp)     25000 non-null  float64
 3   Peak Overpressure (pa)           25000 non-null  float64
 4   Spacing (m)                      25000 non-null  float64
 5   Stemming (m^3)                   25000 non-null  float64
 6   Number of rows (explosive load)  25000 non-null  float64
 7   Air-overpressure (index)         25000 non-null  float64
dtypes: float64(8)
memory usage: 1.7 MB


In [7]:
model = GradientBoostingClassifier(learning_rate=0.1, n_estimators=20, min_samples_split=15).fit(X_df,y_df)
joblib.dump(model, 'sklearn_GB_model.pkl')

['sklearn_GB_model.pkl']

In [8]:
# Connect to the Workspace
ws = Workspace.from_config()

# The default datastore is a blob storage container where datasets are stored
datastore = ws.get_default_datastore()

# Register the dataset
ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe=df, 
        name='Air dataset', 
        description='Air-Overpresure',
        target=datastore
    )

X = ds.drop_columns("Percentil_Rango")
y = ds.keep_columns("Percentil_Rango")

# Display information about the dataset
print(ds.name + " v" + str(ds.version) + ' (ID: ' + ds.id + ")")

Validating arguments.
Arguments validated.
Validating arguments.
Arguments validated.
'overwrite' is set to True. Any file already present in the target will be overwritten.
Uploading files from 'C:/Users/alexm/AppData/Local/Temp/tmpadfryv2e' to 'managed-dataset/e7e3e12b-e9b4-45ca-8e2c-66d60604f735/'
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.
Air dataset v1 (ID: f11f2086-e9dd-48d3-905f-86764c30811e)


In [9]:
model = Model.register(workspace=ws,
                       model_name='GB-model',                # Name of the registered model in your workspace.
                       model_path='./sklearn_GB_model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       sample_input_dataset=X,
                       sample_output_dataset=y,
                       resource_configuration=ResourceConfiguration(cpu=2, memory_in_gb=4),
                       description='Gradient Boosting clasification model to predict air-overpressure.',
                       tags={'area': 'air-overpressure', 'type': 'clasification'})

print('Name:', model.name)
print('Version:', model.version)

Registering model GB-model
Name: GB-model
Version: 1


In [10]:
df.columns

Index(['Impulse (I)', 'Air density (pa)', 'Positive Phase Duration (tp)',
       'Peak Overpressure (pa)', 'Spacing (m)', 'Stemming (m^3)',
       'Number of rows (explosive load)', 'Air-overpressure (index)',
       'Percentil_Rango'],
      dtype='object')

In [12]:
%%writefile score_air.py

import json
import pickle
import numpy as np
import pandas as pd
import os
import joblib
from azureml.core.model import Model

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType


def init():
    global model
    # Replace filename if needed.
    path = os.getenv('AZUREML_MODEL_DIR') 
    model_path = os.path.join(path, 'sklearn_GB_model.pkl')
    # Deserialize the model file back into a sklearn model.
    model = joblib.load(model_path)


input_sample = pd.DataFrame(data=[{
    "Impulse (I)": 3.61,
    "Air density (pa)": 6.75,
    "Positive Phase Duration (tp)": 2.78,
    "Peak Overpressure (pa)": 6.23,
    "Spacing (m)": 1.92,
    "Stemming (m^3)": 4.21,
    "Number of rows (explosive load)": 5.12,
    "Air-overpressure (index)": 4.85,
}])

# This is an integer type sample. Use the data type that reflects the expected result.
output_sample = np.array([0])

# To indicate that we support a variable length of data input,
# set enforce_shape=False
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        print("input_data....")
        print(data.columns)
        print(type(data))
        result = model.predict(data)
        print("result.....")
        print(result)
    # You can return any data type, as long as it can be serialized by JSON.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

Writing score_air.py


In [13]:
from azureml.core.model import InferenceConfig
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'pandas',
    'scikit-learn=={}'.format(sklearn.__version__)
])

inference_config = InferenceConfig(entry_script='./score_air.py',environment=environment)

In [14]:
aci_config = AciWebservice.deploy_configuration(cpu_cores = 1,
                                                memory_gb = 1,
                                                auth_enabled=True)
                                        
service_name = 'my-air-model'

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config=aci_config, overwrite=True)

service.wait_for_deployment(show_output=True)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(ws, service_name, [model], inference_config, deployment_config=aci_config, overwrite=True)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-12-09 20:12:20+01:00 Creating Container Registry if not exists.
2023-12-09 20:12:20+01:00 Registering the environment.
2023-12-09 20:12:23+01:00 Use the existing image.
2023-12-09 20:12:23+01:00 Generating deployment configuration.
2023-12-09 20:12:24+01:00 Submitting deployment to compute.
2023-12-09 20:12:27+01:00 Checking the status of deployment my-air-model..
2023-12-09 20:13:49+01:00 Checking the status of inference endpoint my-air-model.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [16]:
print(service.scoring_uri)
print(service.swagger_uri)

http://eb43ed3f-7ca1-4be9-9e49-d08f52425abf.francecentral.azurecontainer.io/score
http://eb43ed3f-7ca1-4be9-9e49-d08f52425abf.francecentral.azurecontainer.io/swagger.json


In [17]:
primary, secondary = service.get_keys()
print(primary)

p2xhqZi2C3ht0qSdeYwph4NTCCiUziBx
