# Train and register model

### Connect to workspace

In [12]:
# Connect to workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, "Workspace loaded")

zpdsnfws02 Workspace loaded


### Upload data to datastores

In [13]:
# Get default datastores
default_ds = ws.get_default_datastore()

# Show all datastores
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

azureml_globaldatasets - Default = False
workspaceartifactstore - Default = False
workspaceblobstore - Default = True
workspacefilestore - Default = False


In [14]:
# Upload data to a datastore
default_ds.upload_files(files = ["./datasets/Marketing_Request_20210312.csv"], \
                        target_path = "marketing-request-20210312-data/", \
                        overwrite = True, show_progress = True)

Uploading an estimated of 1 files
Uploading ./datasets/Marketing_Request_20210312.csv
Uploaded ./datasets/Marketing_Request_20210312.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_0ba8fcd87d38467b8f7d9bb2b07aa7fb

### Train model

In [15]:
# Train model from datastore
data_ref = default_ds.path("marketing-request-20210312-data").as_download()
print(data_ref)

$AZUREML_DATAREFERENCE_f07ae46425ee4bd19c111d52e1cd53f0


In [16]:
# Create folder for experiment files
import os

experiment_folder = "marketing_request_20210312_from_datastore"
os.makedirs(experiment_folder, exist_ok = True)
print(experiment_folder, "folder created")

marketing_request_20210312_from_datastore folder created


#### Write training script

In [17]:
%%writefile $experiment_folder/marketing_request_20210312_training.py
# Import libraries
import os 
import argparse
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Parameterised training script
# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--data-folder", type = str, dest = "data_folder", help = "data folder reference")
args = parser.parse_args()

# Get experiment run context
run = Run.get_context()

# Load data from data reference
data_folder = args.data_folder
print("Loading data from", data_folder)
# Load all files and concatenate into a single dataframe
all_files = os.listdir(data_folder)
marketing_request = pd.concat(pd.read_csv(os.path.join(data_folder, csv_file)) for csv_file in all_files)

# Get features and labels
X, y = marketing_request[["ACCESS_COUNT", "VOLUME"]].values, marketing_request["USERS"].values

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 123)

# Train linear regression model
print("Training a linear regression model with n_jobs None")
run.log("data_folder", data_folder)
model = LinearRegression().fit(X_train, y_train)

# Calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print("Accuracy:", acc)
run.log("Accuracy", np.float(acc))

# Calculate mean squared error
y_scores = model.predict(X_test)
mse = mean_squared_error(y_test, y_scores)
run.log("MSE", np.float(mse))

# Store outputs
os.makedirs("outputs", exist_ok = True)
joblib.dump(value = model, filename = "outputs/marketing_request_20210312_model.pkl")

run.complete()

Overwriting marketing_request_20210312_from_datastore/marketing_request_20210312_training.py


#### Run training script

In [18]:
# Create estimator to run script
from azureml.train.sklearn import SKLearn
from azureml.core import Experiment
from azureml.widgets import RunDetails
import pandas as pd

data_ref = default_ds.path("marketing-request-20210312-data").as_download()

# Set script parameters
script_params = {
    "--data-folder": data_ref
}

# Create estimator
estimator = SKLearn(source_directory = experiment_folder,
                   entry_script = "marketing_request_20210312_training.py",
                   script_params = script_params,
                   compute_target = "local",
                    conda_packages = ["pandas", "pyspark"]
                   )

# Create experiment
experiment_name = "marketing_request_20210312_training"
experiment = Experiment(workspace = ws, name = experiment_name)

# Run experiment
run = experiment.submit(config = estimator)

# Show run details
RunDetails(run).show()
run.wait_for_completion()



_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'marketing_request_20210312_training_1632981655_e95fa4d1',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2021-09-30T06:00:58.402748Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '706cf15a-9958-4c5a-bf3e-031e5f14afd4',
  'azureml.git.repository_uri': 'https://github.com/AkbarAzad/akbar_stock_analyser93.git',
  'mlflow.source.git.repoURL': 'https://github.com/AkbarAzad/akbar_stock_analyser93.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'marketing_request_20210312_training.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--data-folder',
   '$AZUREML_DATAREFERENCE_f41e063e1190420e85c4e42c400cdbbc'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {'f41e063e1190420e85c4e42c400cd

### Register model

In [31]:
model = run.register_model(model_path = "outputs/marketing_request_20210312_model.pkl", \
                   model_name = "marketing_request_20210312_model",\
                  tags = {"Training context": "Inline Training"},\
                  properties = {"MSE": run.get_metrics()["MSE"], "Accuracy": run.get_metrics()["Accuracy"]})
print("Model trained and registered.")

Model trained and registered.


In [32]:
print(model.name, model.id, model.version, sep='\t')

marketing_request_20210312_model	marketing_request_20210312_model:5	5


In [34]:
print(model.get_model_path("marketing_request_20210312_model"))

azureml-models\marketing_request_20210312_model\1\marketing_request_20210312_model.pkl


In [36]:
model.id

'marketing_request_20210312_model:5'