In [None]:
import os
from dotenv import load_dotenv
import mlflow
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

load_dotenv()

subscription_id = os.environ.get("SUBSCRIPTION_ID")
resource_group = os.environ.get("RESOURCE_GROUP")
workspace = os.environ.get("AML_WORKSPACE_NAME")

ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id,
    resource_group,
    workspace,
)

azureml_mlflow_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri

mlflow.set_tracking_uri(azureml_mlflow_uri)

# Import package

In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LinearRegression

# Load train/dev data

In [None]:
df_train = pd.read_csv("../data/nyc_taxi_dataset_train.csv")
df_valid = pd.read_csv("../data/nyc_taxi_dataset_valid.csv")
df_train.head()

# Trim the features

In [None]:
col_target = "totalAmount"

In [None]:
X_train = df_train.drop(columns=col_target)
y_train = df_train[col_target].to_numpy().ravel()

X_valid = df_valid.drop(columns=col_target)
y_valid = df_valid[col_target].to_numpy().ravel()

# Build the model

In [None]:
experiment_name = "mlow_nyc_taxi_regression_notebook"
mlflow.set_experiment(experiment_name)

In [None]:
run = mlflow.start_run()

In [None]:
model = LinearRegression().fit(
    X_train, y_train
)

In [None]:
def rmse(validation, target):
    return np.sqrt(mean_squared_error(validation, target))

In [None]:
preds = model.predict(X_train)
train_rmse = rmse(df_train[col_target], preds)
print(train_rmse)
mlflow.log_metric("train_RMSE", train_rmse)

In [None]:
preds = model.predict(X_valid)
valid_rmse = rmse(df_valid[col_target], preds)
print(valid_rmse)
mlflow.log_metric("valid_RMSE", valid_rmse)

# Save the model

In [None]:
model_name = "sklearn-model"
local_path = f"./{model_name}"
mlflow.sklearn.save_model(sk_model=model, path=local_path)

In [None]:
mlflow.sklearn.log_model(
    sk_model=model,
    registered_model_name=model_name,
    artifact_path=model_name,
)

In [None]:
mlflow.end_run()