In [16]:
from azureml.core import Workspace, Datastore, Experiment, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline

In [17]:
pip install azureml-pipeline

Note: you may need to restart the kernel to use updated packages.


In [18]:
# Initialize workspace
ws = Workspace.from_config("Users/mypersonall3099/Online_retail/config.json")

# Define compute target
compute_name = "OnlineRetail"
compute_target = ws.compute_targets[compute_name]

In [33]:
# Define environment
env = Environment.from_conda_specification(name="myenv", file_path="environment.yaml")
env.register(workspace=ws)


{
    "assetId": "azureml://locations/eastus2/workspaces/7ce76f32-f5fe-4dd3-b900-96fb92b58ebb/environments/myenv/versions/4",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "myenv",
    "python": {
       

In [34]:
from azureml.core import Environment

env = Environment.get(workspace=ws, name="myenv")
print(env.python.conda_dependencies.serialize_to_string())


name: myenv
channels:
- defaults
dependencies:
- python=3.7
- scikit-learn
- statsmodels
- pip
- pip:
  - pmdarima
  - azureml-sdk
  - azureml-dataset-runtime[fuse, pandas]
  - azureml-defaults
  - joblib
  - scikit-learn



In [35]:
# Define run configuration
aml_config = RunConfiguration()
aml_config.target = compute_target
aml_config.environment = env

In [22]:
#Pipeline
read_data = 'data_wrangling.py'
prep = 'preprocessing.py'
model = 'modelling.py'

source_directory = ""

#Script initialization
py_script_run_read = PythonScriptStep(
                name='Data Wrangling Step',
                script_name = read_data,
                compute_target = compute_target,
                runconfig = aml_config,
                allow_reuse=False)

py_script_run_prep = PythonScriptStep(
                script_name = prep,
                compute_target=compute_target,
                source_directory=source_directory,
                runconfig = aml_config,
                allow_reuse=False)

py_script_run_model = PythonScriptStep(
                script_name = model,
                compute_target=compute_target,
                source_directory=source_directory,
                runconfig = aml_config,
                allow_reuse=False)

pipeline_steps = [py_script_run_read, py_script_run_prep, py_script_run_model]
pipeline_1 = Pipeline(workspace=ws, steps = pipeline_steps)

In [23]:
# Experiment 1

experiment_name = "Run4"
experiment = Experiment(ws, experiment_name)
pipeline_run = experiment.submit(pipeline_1)
pipeline_run.wait_for_completion(show_output=True)

Created step Data Wrangling Step [836a4389][89577514-d19b-4359-a119-b71f2e68aebf], (This step will run and generate new outputs)
Created step preprocessing.py [63640a78][c13f897d-3a38-4a43-ad6c-de7d2c97f746], (This step will run and generate new outputs)
Created step modelling.py [ebf0da1d][c0296db0-33e2-48d6-b2c3-4c7c0f5a184e], (This step will run and generate new outputs)
Submitted PipelineRun d45f4227-0619-4a08-92c8-4158af65b345
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d45f4227-0619-4a08-92c8-4158af65b345?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRunId: d45f4227-0619-4a08-92c8-4158af65b345
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d45f4227-0619-4a08-92c8-4158af65b345?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
Pipeline

'Finished'

In [13]:
# Experiment 2

pipeline_run = Experiment(ws, "Exp2").submit(pipeline_1)
pipeline_run.wait_for_completion(show_output=True)

Submitted PipelineRun 07e6bc8b-875d-485b-978c-d438a1c8b471
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/07e6bc8b-875d-485b-978c-d438a1c8b471?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRunId: 07e6bc8b-875d-485b-978c-d438a1c8b471
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/07e6bc8b-875d-485b-978c-d438a1c8b471?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 1048dfa3-19f2-4b9d-90dc-ab945944ec81
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1048dfa3-19f2-4b9d-90dc-ab945944ec81?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
StepRun( Data Wrangling 

'Finished'

In [24]:
pipeline_run

Experiment,Id,Type,Status,Details Page,Docs Page
Run4,d45f4227-0619-4a08-92c8-4158af65b345,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [26]:
datastore = Datastore.get(ws, 'workspaceblobstore')
datastore.download(target_path="model", prefix="best_autoarima_model", overwrite=False, show_progress=True)

0

In [27]:
# Retrieve the datastore
datastore = Datastore.get(ws, 'workspaceblobstore')

# Download the model file from the models folder
datastore.download(target_path="model", 
                   prefix="models/best_autoarima_model.pkl",
                   overwrite=True,  # Set to True to overwrite existing files
                   show_progress=True)


Downloading models/best_autoarima_model.pkl
Downloaded models/best_autoarima_model.pkl, 1 files out of an estimated total of 1


1

In [28]:
# Registering the model

from azureml.core import Datastore, Workspace, Model

model = Model.register(workspace=ws,
                       model_name='best_autoarima_model',  # Name of the model in the Azure registry
                       model_path="model/models/best_autoarima_model.pkl",  # Path to the downloaded model file
                       description="Best Auto-ARIMA model for sales forecasting",
                       tags={"model_type": "Auto-ARIMA", "framework": "pmdarima"})

print(f"Model registered: {model.name} with version {model.version}")


Registering model best_autoarima_model
Model registered: best_autoarima_model with version 1


In [29]:
# Load the model
from azureml.core.model import Model
model = Model(ws, name='best_autoarima_model')
print("Loaded model version:", model.version)

Loaded model version: 1


# Deployment


In [31]:
%%writefile score.py
import joblib
import json
import numpy as np
from azureml.core.model import Model

def init():
    global model_3
    model_3_path = Model.get_model_path(model_name='best_autoarima_model')
    model_3 = joblib.load(model_3_path)

def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = np.array(data)
        result_1 = model_3.predict(data)
        return {"prediction1": result_1.tolist()}
    except Exception as e:
        result = str(e)
        return result


Writing score.py


In [36]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script="score.py", environment=env)

In [37]:
from azureml.core.webservice import AciWebservice

aci_service_name = "aciservice-modelforecasting"

deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config, overwrite=True)
service.wait_for_deployment(True)

print(service.state)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config, overwrite=True)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2024-08-18 15:46:02+00:00 Registering the environment.
2024-08-18 15:46:05+00:00 Building image..
2024-08-18 15:55:33+00:00 Generating deployment configuration.
2024-08-18 15:55:35+00:00 Submitting deployment to compute.
2024-08-18 15:55:42+00:00 Checking the status of deployment aciservice-modelforecasting..
2024-08-18 15:57:08+00:00 Checking the status of inference endpoint aciservice-modelforecasting.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [40]:
from azureml.core import Workspace, Dataset

datastore = Datastore.get(ws, 'workspaceblobstore')
df = Dataset.Tabular.from_delimited_files(path=[(datastore, "incremental_data/incremental_data.csv")]).to_pandas_dataframe()
df.head()



{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


Unnamed: 0,InvoiceDate,Sales
0,2011-10-26,31192.55
1,2011-01-03,0.0
2,2010-12-16,36715.36
3,2011-10-22,0.0
4,2011-01-27,13370.32


In [44]:
import json
import numpy as np

df_new['InvoiceDate'] = df_new['InvoiceDate'].astype(str)
df_new = np.array(df)

test_sample = json.dumps({'data': df_new.tolist()})
predictions = service.run(test_sample)
predictions

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [45]:
import json
import numpy as np
import pandas as pd

# Assuming 'df' is your original DataFrame
df_new = df.copy()

# Convert Timestamp columns to strings (to be JSON serializable)
if 'InvoiceDate' in df_new.columns:
    df_new['InvoiceDate'] = df_new['InvoiceDate'].astype(str)

# Convert DataFrame to NumPy array
df_new_array = df_new.to_numpy()

# Serialize the data
test_sample = json.dumps({'data': df_new_array.tolist()})
predictions = service.run(test_sample)
print(predictions)


n_periods must be an int


In [46]:
predictions

'n_periods must be an int'