# Import Data

In [39]:
import pandas as pd
import io
import requests
from azureml.core import Dataset, Datastore
from azureml.data.datapath import DataPath

url="https://raw.githubusercontent.com/ColdStart-Challenge/ColdStart-Challenge-2021/main/challenges/challenge5/starter/data/coldstart-historical-sales.csv"
s=requests.get(url).content
df=pd.read_csv(io.StringIO(s.decode('utf-8')))

df_tab = Dataset.Tabular.from_delimited_files(url)

X_tab = df_tab.drop_columns("count")
y_tab = df_tab.keep_columns("count")

df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,count
0,5/22/2019 0:00,1,0,0,1,9.84,14.395,81,0.0,16
1,5/22/2019 1:00,1,0,0,1,9.02,13.635,80,0.0,40
2,5/22/2019 2:00,1,0,0,1,9.02,13.635,80,0.0,32
3,5/22/2019 3:00,1,0,0,1,9.84,14.395,75,0.0,13
4,5/22/2019 4:00,1,0,0,1,9.84,14.395,75,0.0,1


# Transform Data

In [42]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

df = df[["season", "holiday", "workingday", "weather", "temp", "atemp", "humidity", "windspeed", "count"]]

#Count Missing Values
print(" \nCount missing values: ",
       df.isnull().sum().sum())

#No Missing Values, so no cleaning needed

#Set categorical variables as categorical

df["season"] = df["season"].astype("category")
df["weather"] = df["weather"].astype("category")

#Shuffle the data:
df = df.sample(frac=1)

#Set feature and target variables
X = df.iloc[:, [0,1,2,3,4,5,6,7]]
y = df.iloc[:, 8]

#Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)


 
Count missing values:  0


# K-Fold CV and Training

In [44]:
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import pandas as pd

param_grid = {'max_depth': [2, 5, 10],
              'min_samples_split': [2, 5, 10]}

base_estimator = RandomForestRegressor(random_state=0)

sh = GridSearchCV(base_estimator, param_grid, cv=10, scoring='r2').fit(X, y)
best_model = sh.best_estimator_
best_score = sh.best_score_
print('Best Model: ', best_model, '\n with score: ', best_score)


Best Model:  RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=10, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False) 
 with score:  0.3664615637254064


In [45]:
import joblib
joblib.dump(sh, 'sklearn_regression_model.pkl')

['sklearn_regression_model.pkl']

# Registering the Model

In [46]:
import sklearn

from azureml.core import Workspace
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

ws = Workspace.from_config()

model = Model.register(workspace=ws,
                       model_name='my-sklearn-model',                # Name of the registered model in your workspace.
                       model_path='./sklearn_regression_model.pkl',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       sample_input_dataset=X_tab,
                       sample_output_dataset=y_tab,
                       resource_configuration=ResourceConfiguration(cpu=2, memory_in_gb=4),
                       description='Regression model to predict Ice Cream Sales.')

print('Name:', model.name)
print('Version:', model.version)

Registering model my-sklearn-model
Name: my-sklearn-model
Version: 2


# Define Scoring Script

In [47]:
%%writefile score.py

import json
import pickle
import numpy as np
import pandas as pd
import os
import joblib
from azureml.core.model import Model

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType


def init():
    global model
    # Replace filename if needed.
    path = os.getenv('AZUREML_MODEL_DIR') 
    model_path = os.path.join(path, 'sklearn_regression_model.pkl')
    # Deserialize the model file back into a sklearn model.
    model = joblib.load(model_path)


input_sample = pd.DataFrame(data=[{
    "season": 1,
    "holiday": 0,
    "workingday": 0,
    "weater": 1,
    "temp": 9.84,
    "atemp": 10.254,
    "humidity": 81,
    "windspeed": 0.0
}])

# This is an integer type sample. Use the data type that reflects the expected result.
output_sample = np.array([0])

# To indicate that we support a variable length of data input,
# set enforce_shape=False
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        print("input_data....")
        print(data.columns)
        print(type(data))
        result = model.predict(data)
        print("result.....")
        print(result)
    # You can return any data type, as long as it can be serialized by JSON.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

Overwriting score.py


# Define custom Environment

In [48]:
from azureml.core.model import InferenceConfig
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'pandas',
    'scikit-learn=={}'.format(sklearn.__version__)
])

inference_config = InferenceConfig(entry_script='./score.py',environment=environment)

# Deploy Model

In [49]:
service_name = 'my-coldstart-model'

service = Model.deploy(ws, service_name, [model], inference_config, overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-21 17:23:32+00:00 Creating Container Registry if not exists.
2021-05-21 17:23:32+00:00 Registering the environment.
2021-05-21 17:23:33+00:00 Use the existing image.
2021-05-21 17:23:33+00:00 Generating deployment configuration.
2021-05-21 17:23:34+00:00 Submitting deployment to compute.
2021-05-21 17:23:37+00:00 Checking the status of deployment my-coldstart-model..
2021-05-21 17:23:48+00:00 Checking the status of inference endpoint my-coldstart-model.
Succeeded
ACI service creation operation finished, operation "Succeeded"


# JSON

In [50]:
import json

input_payload = json.dumps({
    'data':df.iloc[[0, 1, 2],[0,1,2,3,4,5,6,7]].values.tolist()
})

output = service.run(input_payload)

print(output)


[407.0327391774891, 346.33451515151506, 336.798755952381]


# Redeploy to ACI

In [51]:
from azureml.core.webservice import AciWebservice

deployment_config = AciWebservice.deploy_configuration(
    cpu_cores=0.5, memory_gb=1, auth_enabled=True
)

In [52]:
service = Model.deploy(
    ws,
    "myservice",
    [model],
    inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-21 17:29:07+00:00 Creating Container Registry if not exists.
2021-05-21 17:29:07+00:00 Registering the environment.
2021-05-21 17:29:08+00:00 Use the existing image.
2021-05-21 17:29:08+00:00 Generating deployment configuration.
2021-05-21 17:29:09+00:00 Submitting deployment to compute..
2021-05-21 17:29:32+00:00 Checking the status of deployment myservice..
2021-05-21 17:31:24+00:00 Checking the status of inference endpoint myservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [53]:
print(service.get_logs())

2021-05-21T17:31:13,627134839+00:00 - rsyslog/run 
2021-05-21T17:31:13,629610549+00:00 - iot-server/run 
2021-05-21T17:31:13,634830170+00:00 - gunicorn/run 
2021-05-21T17:31:13,629025847+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-05-21T17:31:13,863191275+00:00 - iot-server/finish 1 0
2021-05-21T17:31:13,864127579+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (18)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 41
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-05-21 17:31:16,583 | root | INFO | Starting up app insights client
2021-05-21 17:31:16,584 | root | INFO | Starting up request id generator
2021-05-21 17:31:16,584 | root | INFO | Starting up app insight hooks
2021-05-21 17:31:16,584 | root | INFO | Invoking user's init function
2021-05-21 17:31:17,282 | root | INFO | Users's init has completed suc

In [54]:
import requests
import json
from azureml.core import Webservice

service = Webservice(workspace=ws, name="myservice")
scoring_uri = service.scoring_uri

# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"

# Make the request and display the response and logs
data = json.dumps({
    'data':df.iloc[[0, 1, 2],[0,1,2,3,4,5,6,7]].values.tolist()
})
resp = requests.post(scoring_uri, data=data, headers=headers)
print(resp.text)

[407.0327391774891, 346.33451515151506, 336.798755952381]
