# 1 model using reguralisation

Now we'll create a model using reguralisation to increase it's performance

Let's load our data and packages first

In [1]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, AzureCliCredential
from azure.ai.ml import MLClient

In [2]:
import yaml

# Load config from YAML file
with open("config.yaml", "r") as f:
    config = yaml.safe_load(f)

# Extract individual variables
subscription_id = config["subscription_id"]
resource_group = config["resource_group"]
workspace_name = config["workspace_name"]

print(workspace_name)
print(subscription_id)
print(resource_group)
type(resource_group)

mlops-project-ml
88f330cf-5648-423b-aaa3-bc9705075a61
mlops-project


str

In [3]:
credential = DefaultAzureCredential()

In [4]:
ml_client = MLClient(
    credential, subscription_id, resource_group, workspace_name
)
print("DEBUG:", subscription_id, resource_group, workspace_name)


DEBUG: 88f330cf-5648-423b-aaa3-bc9705075a61 mlops-project mlops-project-ml


In [5]:
from azure.ai.ml.entities import Environment
import os

custom_env_name = "aml-training"


pipeline_job_project_env = Environment(
    name="training-env",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    conda_file=os.path.join("components", "training", "conda.yaml"),
)
pipeline_job_project_env = ml_client.environments.create_or_update(pipeline_job_project_env)

print(
    f"Environment with name {pipeline_job_project_env.name} is registered to workspace, the environment version is {pipeline_job_project_env.version}"
)
print("DEBUG:", subscription_id, resource_group, workspace_name)

Environment with name training-env is registered to workspace, the environment version is 1
DEBUG: 88f330cf-5648-423b-aaa3-bc9705075a61 mlops-project mlops-project-ml


In [8]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output
import os

from azure.ai.ml import command, Input, Output
from azure.ai.ml.entities import Component

# Create the component using the Component class
training_component = command(
    name="training-1",
    display_name="Training an AI model",
    description="Trains an AI model by inputting training data CSV, augmenting it, and saving a model.",
    inputs={
        "training_data": Input(type="uri_folder"),
        "epochs": Input(type="number", default=100),
        "noise_level": Input(type="number", default=0.01),
    },
    outputs={
        "model_output": Output(type="uri_folder", mode="rw_mount"),
    },
    code=".",
    command="python train.py --training_data ${{inputs.training_data}} --epochs ${{inputs.epochs}} --noise_level ${{inputs.noise_level}} --output_folder ${{outputs.model_output}}",
    environment="training-env:1",
)

In [9]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute="project-ml-compute",
    description="training pipeline"
)


def housing_training_pipeline(
    epochs: int,
    input_version: str = "1",
    output_version: str = "1"
):
    training_job = training_component(
        training_data= Input(
            type = "uri_folder",
            path = "azureml:data_cleaned:1"
        ),
        epochs=epochs,
        noise_level=0.01
    )

    output_path = (
        f"azureml://subscriptions/{subscription_id}/"
        f"resourcegroups/{resource_group}/"
        f"workspaces/{workspace_name}/"
        f"datastores/workspaceblobstore/paths/trained_model/"
    )

    training_job.outputs.model_output = Output(
        type="uri_folder",
        path=output_path,
        name="trained_model",
        mode="rw_mount"
    )

    return {
        "output_data": training_job.outputs.model_output,
    }

# Create the pipeline instance
training_pipeline = housing_training_pipeline(
    epochs=100
)

# Submit the pipeline directly
training_pipeline_job = ml_client.jobs.create_or_update(
    training_pipeline,
    experiment_name="training_pipeline",
)

print(f"Pipeline submitted: {training_pipeline_job.studio_url}")



[32mUploading mlops-project (0.11 MBs): 100%|██████████| 114348/114348 [00:00<00:00, 216160.14it/s]
[39m

pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFileJobOutput'> and will be ignored


Pipeline submitted: https://ml.azure.com/runs/polite_match_r12fhkt0pw?wsid=/subscriptions/88f330cf-5648-423b-aaa3-bc9705075a61/resourcegroups/mlops-project/workspaces/mlops-project-ml&tid=4ded4bb1-6bff-42b3-aed7-6a36a503bf7a
