In [2]:
# Create a handler
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()
SUBSCRIPTION=<subscription id>
RESOURCE_GROUP=<name of resource group>
WS_NAME=<Name of Workspace>

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

In [3]:
# Verification that Handler is running and able to interact with workspace
ws = ml_client.workspaces.get(WS_NAME)
print(ws.location,":", ws.resource_group)

northeurope : from_model_to_production


In [4]:
# Createa an environment
import os

dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

In [5]:
%%writefile {dependencies_dir}/conda.yaml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=1.0.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - mlflow==2.8.0
    - mlflow-skinny==2.8.0
    - azureml-mlflow==1.51.0
    - psutil>=5.8,<5.9
    - tqdm>=4.59,<4.60
    - ipykernel~=6.0
    - matplotlib
    - opencv-python-headless
    - pillow

Overwriting ./dependencies/conda.yaml


In [6]:
from azure.ai.ml.entities import Environment

custom_env_name = "aml-scikit-learn"

custom_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for image classification",
    tags={"scikit-learn": "1.0.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
custom_job_env = ml_client.environments.create_or_update(custom_job_env)

print(
    f"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}"
)

Environment with name aml-scikit-learn is registered to workspace, the environment version is 24


In [7]:
# Create training script

In [8]:
import os

train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

In [9]:
%%writefile {train_src_dir}/main.py
import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature


from PIL import Image
import cv2
import io

import numpy as np

from sklearn.svm import SVC

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score




def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--registered_model_name", type=str, help="model name")
    parser.add_argument("--data", type=str, help="path to input data")
    args = parser.parse_args()
   
    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    ###################
    #<prepare the data>
    ###################
    


    data_dir = args.data
    folder_paths = [os.path.join(data_dir, d) for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

 

    images = []
    labels = []



    for folder in folder_paths:
        class_name = os.path.basename(folder)
        image_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg') or f.endswith('.png')]

        for img_file in image_files:
            img = Image.open(img_file)
            img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            img = cv2.resize(img, (64, 64))
            img = img.flatten()

            images.append(img)
            labels.append(class_name)


    images = np.array(images) 
    labels = np.array(labels) 


    mlflow.log_metric("num_samples", len(images))
    mlflow.log_metric("num_features", 1)
    mlflow.log_metric("num_features_per_image", len(images[0]) if images.size > 0 else 0)

    #Split train and test datasets
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, train_size=0.8 , random_state=42)

    # Convert to numpy array before training
    X_train = np.array(X_train)
    y_train = np.array(y_train)



    ####################
    #</prepare the data>
    ####################

    ##################
    #<train the model>
    ##################
    svm = SVC(kernel='linear' , probability=True)
    svm.fit(X_train, y_train) 

    # Predict on the test set
    y_pred = svm.predict(X_test)

    # Predict the probabilities for each class
    y_proba = svm.predict_proba(X_test)



    
    # Log performance metrics manually
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)


    print(classification_report(y_test, y_pred))
    ###################
    #</train the model>
    ###################

    ##########################
    #<save and register model>
    ##########################
    
    # After training your model
    signature = infer_signature(X_train, y_train)
    
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=svm,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
        signature=signature,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=svm,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )

    #</save and register model>
    ###########################
    
    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./src/main.py


In [10]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "image_classification_model"

job = command(
    inputs=dict(
        data=Input(
            type="uri_folder",
            path=<path to blob store of animal images>,
        ),
        registered_model_name=registered_model_name,
    ),
    code="./src/", 
    command="python main.py --data ${{inputs.data}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="aml-scikit-learn@latest",
    display_name="pet_classification",
    compute="MLprodComputeCheap",
)

In [11]:
ml_client.create_or_update(job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.0 MBs): 100%|███

Experiment,Name,Type,Status,Details Page
janphilipp,frosty_lettuce_tjll2rymr4,command,Starting,Link to Azure Machine Learning studio
