## Conectar ao Workspace

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

SUBSCRIPTION = 
RESOURCE_GROUP = 
WS_NAME = 
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

ws = ml_client.workspaces.get(WS_NAME)
print(ws.location, ":", ws.resource_group)

brazilsouth : dio_exam


## Criar o ambiente do job

### Cria o diretório 

In [10]:
import os

dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

### Cria as váriaveis de ambiente conda

In [11]:
%%writefile {dependencies_dir}/conda.yaml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=1.0.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - mlflow==2.8.0
    - mlflow-skinny==2.8.0
    - azureml-mlflow==1.51.0
    - psutil>=5.8,<5.9
    - tqdm>=4.59,<4.60
    - ipykernel~=6.0
    - matplotlib

Overwriting ./dependencies/conda.yaml


In [12]:
from azure.ai.ml.entities import Environment

custom_env_name = "sklearn-basic"

custom_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for gelato regresion model",
    tags={"scikit-learn": "1.0.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
custom_job_env = ml_client.environments.create_or_update(custom_job_env)

print(f"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}")


Environment with name sklearn-basic is registered to workspace, the environment version is 4


## Criando o script do modelo

In [None]:
import os

train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

In [14]:
%%writefile {train_src_dir}/main.py
import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()
   
    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    ###################
    #<prepare the data>
    ###################
    
    gelato_df = pd.read_csv(args.data)

    mlflow.log_metric("num_samples", gelato_df.shape[0])
    mlflow.log_metric("num_features", gelato_df.shape[1] - 1)

    #Split train and test datasets
    X = gelato_df.drop('Ice Cream Profits', axis=1)
    y = gelato_df['Ice Cream Profits']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_train_ratio, random_state=42)
    ####################
    #</prepare the data>
    ####################

    ###################
    #</train the model>
    ###################
    lr = LinearRegression()

#treinando o modelo
    lr.fit(X_train, y_train)

    #Testando
    lr.score(X_test, y_test)

    y_pred = lr.predict(X_test)

    print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
    print(f'MAE: {r2_score(y_test, y_pred)}')

    ##########################
    #<save and register model>
    ##########################
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=lr,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=lr,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )
    ###########################
    #</save and register model>
    ###########################
    
    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./src/main.py


## Configura o comando

In [17]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "gelato_regression_model_nt"

job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path="https://ml.azure.com/fileexplorerAzNB?wsid=/subscriptions/4541dd7b-7618-42c2-89fa-4545a19be31e/resourcegroups/dio_exam/providers/Microsoft.MachineLearningServices/workspaces/Gelato_magico-ml&tid=bdae6e2c-0449-4431-8ee0-a78faed19549&activeFilePath=Users/iaanenomoto2/gelato-magico-ml/Ice%20Cream%20Sales%20-%20temperatures.csv",
        ),
        test_train_ratio=0.2,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="sklearn-basic@latest",
    display_name="gelato_regression_prediction_nt",
)

## Chama o job

In [18]:
ml_client.create_or_update(job)

Experiment,Name,Type,Status,Details Page
gelato-magico-ml,yellow_cart_cyb6ktndq4,command,Starting,Link to Azure Machine Learning studio
