In [1]:
import os

# create a folder for the script files
script_folder = 'src'
os.makedirs(script_folder, exist_ok=True)
print(script_folder, 'folder created')

src folder created


In [5]:

import pandas as pd
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())
data_asset = ml_client.data.get("diabates_pre_processed", version="1")

df = pd.read_csv(data_asset.path)
df

Found the config file in: /config.json


Unnamed: 0,age,hypertension,heart_disease,bmi,HbA1c_level,blood_glucose_level,diabetes,gender_Male,gender_Other,smoking_history_current,smoking_history_ever,smoking_history_former,smoking_history_never,smoking_history_not current
0,1.692704,0,1,-0.321056,1.001706,0.047704,0,0,0,0,0,0,1,0
1,0.538006,0,0,-0.000116,1.001706,-1.426210,0,0,0,0,0,0,0,0
2,-0.616691,0,0,-0.000116,0.161108,0.489878,0,1,0,0,0,0,1,0
3,-0.261399,0,0,-0.583232,-0.492690,0.416183,0,0,0,1,0,0,0,0
4,1.515058,1,1,-1.081970,-0.679490,0.416183,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.692704,0,0,-0.000116,0.628107,-1.180558,0,0,0,0,0,0,0,0
99996,-1.771388,0,0,-1.499343,0.908306,-0.934905,0,0,0,0,0,0,0,0
99997,1.070944,0,0,0.076729,0.161108,0.416183,0,1,0,0,0,1,0,0
99998,-0.794336,0,0,1.220361,-1.426688,-0.934905,0,0,0,0,0,0,1,0


In [7]:
%%writefile $script_folder/train-model-mlflow.py
# import libraries
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

def main(args):
    # read data
    df = get_data(args.training_data)

    # feature selection and split data
    feature_list = ['age','hypertension','heart_disease','bmi','HbA1c_level','blood_glucose_level']
    X_train, X_test, y_train, y_test = split_data(df, feature_list)

    # train model
    model = train_model(args.reg_rate, X_train, X_test, y_train, y_test)

    # evaluate model
    eval_model(model, X_test, y_test)

   

def get_data(path):
    print("Reading data...")
    df = pd.read_csv(path)
    return df

def split_data(df, feature_list):
    print("Splitting data...")
    X, y = df[feature_list].values, df['diabetes'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)
    return X_train, X_test, y_train, y_test

def train_model(reg_rate, X_train, X_test, y_train, y_test):
    mlflow.log_param("Regularization rate", reg_rate)
    print("Training model...")
    model = LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)
    mlflow.sklearn.save_model(model, args.model_output)
    return model

def eval_model(model, X_test, y_test):
    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)
    print('Accuracy:', acc)
    mlflow.log_metric("Accuracy", acc)

    y_scores = model.predict_proba(X_test)
    auc = roc_auc_score(y_test, y_scores[:,1])
    print('AUC: ' + str(auc))
    mlflow.log_metric("AUC", auc)

    # plot ROC curve and log as an artifact
    plot_roc_curve(y_test, y_scores)

def plot_roc_curve(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
    fig = plt.figure(figsize=(6, 4))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    roc_curve_path = "ROC-Curve.png"
    plt.savefig(roc_curve_path)
    mlflow.log_artifact(roc_curve_path)
    plt.close(fig)

def log_model(model):
    mlflow.sklearn.log_model(model, "model")

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--training_data", dest='training_data', type=str)
    parser.add_argument("--reg_rate", dest='reg_rate', type=float, default=0.01)
    parser.add_argument("--model_output", dest='model_output',
                        type=str)

    args = parser.parse_args()
    return args

if __name__ == "__main__":
    print("\n\n")
    print("*" * 60)
    args = parse_args()
    main(args)
    print("*" * 60)
    print("\n\n")


Overwriting src/train-model-mlflow.py


In [8]:
from azure.ai.ml import MLClient, command, Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import DefaultAzureCredential

# Initialize MLClient using DefaultAzureCredential
ml_client = MLClient.from_config(credential=DefaultAzureCredential())

# Get the dataset asset from Azure ML workspace
data_asset = ml_client.data.get("diabates_pre_processed", version="1")

# configure job

job = command(
    code ="./src",# Path where the preprocess.py script is located
    command='python train-model-mlflow.py --training_data "${{inputs.data}}" --model_output "${{outputs.model_output}}"',
    inputs={
        "data": Input(
            path=data_asset.id,
            type=AssetTypes.URI_FILE,
            mode=InputOutputModes.RO_MOUNT
        )
    },
    outputs={
        "model_output": Output(
            type=AssetTypes.MLFLOW_MODEL
        )
    },

    
    environment="test-env-azureml:1",# Ensure this environment has all required dependencies
    compute="test-compute-1-mlstudio",)
      


# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Found the config file in: /config.json
Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.

Monitor your job at https://ml.azure.com/runs/loyal_queen_dfkryhzlk7?wsid=/subscriptions/3b7a65ed-df6d-4020-9010-5585f2149752/resourcegroups/rg-test-1/workspaces/mlstudio-test-1&tid=dc0b52a3-68c5-44f7-881d-9383d8850b96
