In [2]:
import numpy as np
import pandas as pd

In [3]:
from azureml.core import Workspace, Dataset, Datastore

In [4]:
ws = Workspace.from_config()

In [5]:
dataset = Dataset.get_by_name(name = "wine-quality", workspace=ws)

In [6]:
df = dataset.to_pandas_dataframe()

In [17]:
df = df.loc[df["quality"].isin([5, 6])]
df["quality"] = np.where(df["quality"]==6, 1, 0)
df["quality"].mean()

0.6013679890560876

In [20]:
import os
os.makedirs("training_experiment", exist_ok=True)

In [80]:
%%writefile training_experiment/train_args.py
from azureml.core import Run, Workspace, Dataset, Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, precision_score, accuracy_score
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import pickle
import argparse
import os

run = Run.get_context()

FEATURES = ['fixed acidity', 'volatile acidity', 
            'citric acid', 'residual sugar',
            'chlorides', 'free sulfur dioxide', 
            'total sulfur dioxide', 'density',
            'pH', 'sulphates', 'alcohol']
LABEL = "quality"
WS = run.experiment.workspace

parser = argparse.ArgumentParser()
parser.add_argument("--reg-rate", type=float, dest="reg_rate", default=0.01)
args = parser.parse_args()
reg = args.reg_rate
os.makedirs("outputs", exist_ok=True)
def read_data():
    df = pd.read_csv("winequality-white.csv", delimiter=";")
    df["quality"] = np.where(df["quality"]==6, 1, 0)
    df_train, df_test = train_test_split(df, stratify= df["quality"], random_state=9)
    return df_train, df_test

def save_as_pickle(path, obj):
    with open(path, "wb") as f:
        pickle.dump(obj, f)

def train():
    train_df, test_df = read_data()
    scaler = MinMaxScaler()
    train_df[FEATURES] = scaler.fit_transform(train_df[FEATURES])
    test_df[FEATURES] = scaler.transform(test_df[FEATURES])

    save_as_pickle(path="outputs/scaler.pkl", obj=scaler)
    
    lr = LogisticRegression(C=reg)

    lr.fit(train_df[FEATURES], train_df[LABEL])
    train_pred = lr.predict(train_df[FEATURES])
    train_pred_class = np.where(train_pred>0.5, 1,0)
    accuracy = accuracy_score(train_df[LABEL], train_pred_class)
    recall = recall_score(train_df[LABEL], train_pred_class)
    precision = precision_score(train_df[LABEL], train_pred_class)
    train_metrics = {"C":reg,
                     "accurracy": accuracy,
                     "recall":recall,
                     "precision": precision}


    test_pred = lr.predict(test_df[FEATURES])
    test_pred_class = np.where(test_pred>0.5, 1,0)
    accuracy = accuracy_score(test_df[LABEL], test_pred_class)
    recall = recall_score(test_df[LABEL], test_pred_class)
    precision = precision_score(test_df[LABEL], test_pred_class)
    test_metrics = {"C": reg, 
                    "accurracy": accuracy,
                     "recall":recall,
                     "precision": precision}

    run.log_table("train_metrics", train_metrics)
    run.log_table("test_metrics", test_metrics)
    save_as_pickle(path="outputs/model.pkl", obj=lr)

train()
run.complete()

Overwriting training_experiment/train_args.py


In [81]:
from azureml.core.environment import CondaDependencies
env_config = CondaDependencies.create(python_version="3.8",
                         pip_packages=["scikit-learn",
                                      "numpy", "pandas", 
                                      "azureml-core",
                                      "azureml-defaults",
                                      "argparse"])
env_config.save_to_file(base_directory="training_experiment", conda_file_path="env.yaml")

from azureml.core import Experiment, ScriptRunConfig, Workspace, Environment
ws = Workspace.from_config()
experiment = Experiment(name="train-wine-quality", workspace=ws)
env = Environment.from_conda_specification(name="training_env", file_path="training_experiment/env.yaml")
src = ScriptRunConfig(source_directory="training_experiment",
                      script="train_args.py",
                      arguments=["--reg-rate", 10],
                      environment=env)
run = experiment.submit(src)
run.complete()

In [82]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [84]:
for file in run.get_file_names():
    print(file)

azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/22313_azureml.log
outputs/model.pkl
outputs/scaler.pkl


In [85]:
run.download_file("outputs/scaler.pkl", "outputs")
run.download_file("outputs/model.pkl", "outputs")

In [87]:
Model.register(ws, model_name="scaler-wine-quality", model_path="outputs/scaler.pkl")
Model.register(ws, model_name="lr-model-wine-quality", model_path="outputs/model.pkl")

Registering model scaler-wine-quality
Registering model lr-model-wine-quality


Model(workspace=Workspace.create(name='learn-mlops-ws', subscription_id='fcd1fe46-718c-472d-9814-211fa6d32599', resource_group='learn-mlops-rg'), name=lr-model-wine-quality, id=lr-model-wine-quality:1, version=1, tags={}, properties={})

In [89]:
from azureml.core import Model
for model in Model.list(ws):
    print(model.name)

lr-model-wine-quality
scaler-wine-quality
iris_app_logistic_model
iris_app_label_encoder
iris_app_scaler
iris_app_label_encoder
iris_app_scaler
iris_lr_model_onnx
iris_lr_model_onnx
iris_lr_model
iris_lr_model
iris_lr_model
standard-scaler
