## Model training components

In [1]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import load_component
from azure.keyvault.secrets import SecretClient

### Configure key-vault to get secrets

In [2]:
keyVaultName = "mobilepricinga6945442583"
KVUri = f"https://{keyVaultName}.vault.azure.net"
credential = DefaultAzureCredential()
client = SecretClient(vault_url=KVUri, credential=credential)

### Authenticate

In [3]:
credential = DefaultAzureCredential()

### Get Secrets from key-vault

In [4]:
subs_id = client.get_secret("subscription-id").value
rg_name = client.get_secret("ml-resource-group").value
ws_name = client.get_secret("ml-workspace-name").value

### Create Client

In [5]:
ml_client = MLClient(
    credential=credential,
    subscription_id=subs_id,
    resource_group_name=rg_name,
    workspace_name=ws_name,
)

### Define data preparation component

Azure ml supports mlflow style pipeline components that are defined using a yaml file.

The structure of a yaml file is as given below

```yaml
name: data_prep_mobile_pricing
display_name: Prepare data for mobile pricing model
# version: 1 # Not specifying a version will automatically update the version
type: command
inputs:
  train_data: 
    type:ilefolder
outputs:
  train:
    type: uri_folder
  test:
    type: uri_folder
code: .
environment:
  # for this step, we'll use an AzureML curate environment
  azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1
command: >-
  python run.py 
  --train_data ${{inputs.train_data}} --train ${{outputs.train}} --test ${{outputs.test}}
```
The `run.py` file is defined as given below in `./components/data_prep/run.py`

```python
import pandas as pd 
from sklearn.model_selection import train_test_split
import numpy as np
from argparse import ArgumentParser
import os


def main():
    parser = ArgumentParser()
    parser.add_argument("train_data",help="path to train data")
    parser.add_argument("train",help="path of train data")
    parser.add_argument("test",help="path of test data")
    args = parser.parse_args()
    train_df = pd.read_csv(args.train_data)
    y = train_df['price_range'].values
    X = train_df.drop('price_range',axis=1).values 
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    np.save(os.path.join(args.train,"X_train.npy"),X_train)
    np.save(os.path.join(args.test,"X_test.npy"),X_test)
    np.save(os.path.join(args.train,"y_train.npy"),y_train)
    np.save(os.path.join(args.test,"y_test.npy"),y_test)
if __name__=="__main__":
    main()_```main__":
    main()



### Loading the component

In [6]:
data_prep_component = load_component(source="./components/data_prep/run.yaml")

### Register the component

In [7]:
# Now we register the component to the workspace
data_prep_component = ml_client.create_or_update(data_prep_component)

[32mUploading data_prep (0.0 MBs): 100%|███████████| 3053/3053 [00:01<00:00, 2553.58it/s][0m
[39m



### Component in the workspace

![](../../../../images/pipeline_component_1.PNG)

In [8]:
comp = ml_client.components.get("data_prep_mobile_pricing")

In [9]:
comp

CommandComponent({'intellectual_property': None, 'auto_increment_version': False, 'source': 'REMOTE.WORKSPACE.COMPONENT', 'is_anonymous': False, 'auto_delete_setting': None, 'name': 'data_prep_mobile_pricing', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/0aa1c63a-7a46-403c-91e4-8ec91264bc42/resourceGroups/rg-mobile-pricing-dev/providers/Microsoft.MachineLearningServices/workspaces/mobile-pricing-azml-dev/components/data_prep_mobile_pricing/labels/default', 'Resource__source_path': None, 'base_path': '/Users/gunnvantsaini/Library/CloudStorage/OneDrive-Personal/consulting/setu/mlops_azure/content/01_Mlops_Using_Cloud_Tools/components_azure_ml/notebooks', 'creation_context': <azure.ai.ml._restclient.v2022_10_01.models._models_py3.SystemData object at 0x1324b6260>, 'serialize': <msrest.serialization.Serializer object at 0x1324b5720>, 'command': 'python run.py  --train_data ${{inputs.train_data}}  --train ${{outputs.train}}  --test ${{outpu

### Define training component

```yaml
name: model_training_mobile_pricing
display_name: Training classifier on mobile pricing data
version: 1 # Not specifying a version will automatically update the version
type: command
inputs:
  train: 
    type: uri_folder
  test:
    type: uri_folder
  model_name:
    type: string
outputs:
  model:
    type: uri_folder
code: .
environment:
  # for this step, we'll use an AzureML curate environment
  azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1
command: >-
  python run.py 
  --train ${{inputs.train}} --test ${{inputs.test}} --model_name ${{inputs.model_name}} --model ${{outputs.model}}
```

```python
import numpy as np 
import mlflow
from sklearn.linear_model import LogisticRegression
import os
from argparse import ArgumentParser

# Start Logging
mlflow.start_run()

# enable autologging
mlflow.sklearn.autolog()

os.makedirs("./outputs", exist_ok=True)

def main():
    parser = ArgumentParser()
    parser.add_argument("train",help="Path to train data")
    parser.add_argument("test",help="Path to test data")
    parser.add_argument("model_name",help="Name of the model")
    parser.add_argument("model",help="Path to trained model")
    args = parser.parse_args()
    X_train = np.load(os.path.join(args.train,"X_train.npy"))
    X_test = np.load(os.path.join(args.test,"X_test.npy"))
    y_train = np.load(os.path.join(args.train,"y_train.npy"))
    y_test = np.load(os.path.join(args.test,"y_test.npy"))
    clf = LogisticRegression()
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    mlflow.sklearn.log_model(
        sk_model=clf,
        registered_model_name=args.model_name,
        artifact_path=args.model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=clf,
        path=os.path.join(args.model, "trained_model"),
    )

    # Stop Logging
    mlflow.end_run()

if __name__=="__main__":
    main()
```

### Loading the component

In [10]:
model_training_component = load_component(source="./components/model_training/run.yaml")

### Register Component

In [11]:
model_training_component = ml_client.create_or_update(model_training_component)

[32mUploading model_training (0.0 MBs): 100%|██████| 1946/1946 [00:01<00:00, 1610.85it/s][0m
[39m

