# Login Azure Cloud Plateform

In [1]:
################################### Azure ####################

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.entities import Environment
from azure.ai.ml import command
from azure.ai.ml import dsl,Input, Output
import mlflow
import logging
import webbrowser

############################# Data Analysis & Others ############################

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import argparse
import os

################################# Stastics & Machine Learning #####################

from scipy.stats import skew, kstest
from scipy import stats

from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

# Access the Resource Group and Work Space

In [3]:
ml_client = MLClient(
    credential=credential,
    subscription_id="xxxxxxxx-xxxx-40c6-xxxx-xxxxxxxxxxxx",
    resource_group_name="your_resource_group",
    workspace_name="your_workspace_name",
)

# Retrieve Url From Azure Blob

In [4]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

web_path = "https://your_resource_group.blob.core.windows.net/datasets/train.csv"

# Load the data to 'Data' in Azure ML Work Space

In [5]:
data = Data(name="MobileClassifications", path=web_path, type=AssetTypes.URI_FILE,
            description="Dataset for mobile", 
            tags={"source_type": "web", "source": "AzureML examples blob"},
            version="1.0.3")

data = ml_client.data.create_or_update(data)
print(f"{data.name} dataset was registered to workspace")

MobileClassifications dataset was registered to workspace


# Create Cluster Compute

In [6]:
from azure.ai.ml.entities import AmlCompute

cpu_compute = "cpu-cluster123"

try:
    cpu_cluster = ml_client.compute.get(cpu_compute)
    print(f"You already have a cluster named {cpu_compute}")
except Exception:
    cpu_cluster = AmlCompute(name = "cpu-cluster123",
                             type = "amlcompute",
                             size = "STANDARD_DS3_V2",
                             min_instances = 0,
                             max_instances=4,
                             idle_time_before_scale_down=120,
                             tier = "Dedicated",
                            )
    cpu_cluster = ml_client.begin_create_or_update(cpu_cluster)

You already have a cluster named cpu-cluster123


# Create Custom Environment

In [7]:
dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

In [8]:
%%writefile {dependencies_dir}/conda.yaml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2  
  - matplotlib=3.4.3  # Add this line for matplotlib
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - xlrd==2.0.1
    - azureml-mlflow==1.42.0

Overwriting ./dependencies/conda.yaml


In [9]:
custom_env_name = "data_science_env"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="custom environment ",
    tags={"scikit-learn": "0.24.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    version="0.1.1",
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}")

Environment with name data_science_env is registered to workspace, the environment version is 0.1.1


# Creating Piplines

## Data Preprocessing

In [10]:
data_preprocessing_dir = "./components/data_preprocessing"

os.makedirs(data_preprocessing_dir, exist_ok=True)

In [11]:
%%writefile {data_preprocessing_dir}/data_preprocessing.py

import os
import argparse
import pandas as pd
import logging
import mlflow


def main():

    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--clean_data", type=str, help="path to save cleaned data")
    args = parser.parse_args()

    mlflow.start_run()

    logging.info("Input data: %s", args.data)
    print("Input data:", args.data)

    df = pd.read_csv(args.data)

    mlflow.log_metric("num_samples", df.shape[0])
    mlflow.log_metric("num_features", df.shape[1])

    missing_values = df.isnull().sum()
    missing_values_dict = missing_values[missing_values > 0].to_dict()
    for feature, count in missing_values_dict.items():
        mlflow.log_metric(f"missing_values_{feature}", count)


    duplicate_values = df.duplicated().sum()
    mlflow.log_metric("duplicate_values", duplicate_values)

    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)

    output_path = os.path.join(args.clean_data, "cleaned_data.csv")
    df.to_csv(output_path, index=False)

    logging.info("Cleaned data saved to: %s", output_path)


    mlflow.end_run()


if __name__ == "__main__":
    main()


Overwriting ./components/data_preprocessing/data_preprocessing.py


In [12]:
data_preprocessing_component = command(name="data_preprocessing",
                                        display_name="Data Preprocessing",
                                        description="Clean data",
                                        inputs={"data": Input(type="uri_folder"),},
                                        outputs=dict(
                                            clean_data=Output(type="uri_folder", mode="rw_mount"),),
                                        code=data_preprocessing_dir,
                                        command="""python data_preprocessing.py \
                                                --data ${{inputs.data}} --clean_data ${{outputs.clean_data}} """,
                                        environment=f"{pipeline_job_env.name}:{pipeline_job_env.version}")

data_preprocessing_component = ml_client.create_or_update(data_preprocessing_component.component)
print(f"Component {data_preprocessing_component.name} with Version {data_preprocessing_component.version} is registered")

Component data_preprocessing with Version 2024-10-08-08-06-08-6306347 is registered


## Feature Engineering

In [13]:
feature_eng_dir = "./components/feature_eng"
os.makedirs(feature_eng_dir, exist_ok=True)

In [14]:
%%writefile {feature_eng_dir}/feature_eng.py

import os
import argparse
import pandas as pd
import logging
import mlflow
import matplotlib.pyplot as plt
from scipy.stats import boxcox, skew
from sklearn.preprocessing import MinMaxScaler
from joblib import dump
from sklearn.feature_selection import SelectKBest, chi2, f_classif


def select_first_file(path):
    files = os.listdir(path)
    if not files:
        logging.error("No files found in the specified directory.")
        raise FileNotFoundError("No files found in the specified directory.")
    return os.path.join(path, files[0])


def main():
  
    # Set up logging
    logging.basicConfig(level=logging.INFO)

    # Input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--display_dir", type=str, help="directory to save plots and results")
    parser.add_argument("--output_dir", type=str, help="directory to save plots and results")
    args = parser.parse_args()

    # Start MLflow run
    mlflow.start_run()

    input_file = select_first_file(args.data)
    logging.info(f"Loading data from {input_file}")
    
    df = pd.read_csv(input_file)

    numerical = ['battery_power', 'clock_speed', 'fc',  'int_memory',  'mobile_wt',  
                 'pc', 'px_height','px_width', 'ram', 'sc_h', 'sc_w', 'talk_time']

    cat = ['blue','dual_sim','four_g','m_dep','n_cores', 'three_g','touch_screen', 'wifi']

    # Before Skewness
    skewness_val = {col: skew(df[col]) for col in numerical}
    skewness_before_df = pd.DataFrame(list(skewness_val.items()), columns=['Feature', 'Skewness']).sort_values(by='Skewness', ascending=False)

    skewness_before_csv = os.path.join(args.display_dir, "skew_before_df.csv")
    skewness_before_df.to_csv(skewness_before_csv, index=False)
    logging.info(f"Skewness Before saved to {skewness_before_csv}")

    mlflow.log_artifact(skewness_before_csv)

    # After Skewness
    for col in ['px_height', 'fc', 'sc_w', 'clock_speed']:
        df[col] += 1  
        transformed_data, _ = boxcox(df[col])
        df[col] = transformed_data

    skewness_val = {col: skew(df[col]) for col in numerical}
    skewness_after_df = pd.DataFrame(list(skewness_val.items()), columns=['Feature', 'Skewness']).sort_values(by='Skewness', ascending=False)

    skewness_after_csv = os.path.join(args.display_dir, "skew_after_df.csv")
    skewness_after_df.to_csv(skewness_after_csv, index=False)
    logging.info(f"Skewness After saved to {skewness_after_csv}")

    mlflow.log_artifact(skewness_after_csv)

    # ANOVA
    selector = SelectKBest(f_classif, k=12)
    X_anova = selector.fit_transform(df[numerical], df['price_range'])

    anova_scores = selector.scores_
    anova_p_values = selector.pvalues_

    anova_df = pd.DataFrame({'Feature': numerical, 'P-Value': anova_p_values, 'Scores': anova_scores})
    anova_df.sort_values(by='P-Value', ascending=True, inplace=True)

    plt.figure(figsize=(12, 8))
    plt.bar(anova_df['Feature'], anova_df['P-Value'], color='tab:red', alpha=0.6)
    plt.xticks(rotation=45)
    plt.xlabel('Features')
    plt.ylabel('P-Value')
    plt.title('P-Values of Features from ANOVA')
    plt.axhline(y=0.05, color='gray', linestyle='--', label='Significance Level (0.05)')
    plt.legend()

    anova_chart = os.path.join(args.display_dir, "anova_p_values.png")
    plt.savefig(anova_chart)
    plt.close()

    mlflow.log_artifact(anova_chart)

    # Save ANOVA DataFrame as a CSV
    anova_csv = os.path.join(args.display_dir, "anova_df.csv")
    anova_df.to_csv(anova_csv, index=False)
    logging.info(f"ANOVA DataFrame saved to {anova_csv}")

    mlflow.log_artifact(anova_csv)

    # Chi Square
    chi_scores = chi2(df[cat].astype(int), df['price_range'])
    cat_df = pd.DataFrame({'Feature': cat, 'P-Value': chi_scores[1],})
    cat_df.sort_values(by='P-Value', ascending=True, inplace=True)

    plt.figure(figsize=(15, 8))
    plt.bar(cat_df['Feature'], cat_df['P-Value'], color='tab:blue', alpha=0.6)
    plt.xlabel('Features')
    plt.ylabel('P-Value')
    plt.title('P-Values of Features from Chi-Squared Test')
    plt.axhline(y=0.05, color='gray', linestyle='--', label='Significance Level (0.05)')
    plt.xticks(rotation=45)

    chi_square_chart = os.path.join(args.display_dir, "chi_square_p_values.png")
    plt.savefig(chi_square_chart)
    plt.close()

    mlflow.log_artifact(chi_square_chart)

    # Save Chi-Square DataFrame as a CSV
    chi_square_csv = os.path.join(args.display_dir, "chi_square_df.csv")
    cat_df.to_csv(chi_square_csv, index=False)
    logging.info(f"Chi-Square DataFrame saved to {chi_square_csv}")

    mlflow.log_artifact(chi_square_csv)

    # Feature Selection
    num_features = list(anova_df[anova_df['P-Value'] <= 0.05]['Feature'])
    cat_features = list(cat_df[cat_df['P-Value'] <= 0.05]['Feature'])

    #df[cat_features] = df[cat_features].astype(str)

    output_data = pd.concat([df[num_features].add_suffix('_num'), 
                          df[cat_features].add_suffix('_cat'), 
                          df['price_range']], 
                         axis=1)

    output_data_csv = os.path.join(args.output_dir, "output_data.csv")
    output_data.to_csv(output_data_csv, index=False)
    
    logging.info(f"Output data saved to {output_data_csv}")
    mlflow.log_artifact(output_data_csv)

 

    mlflow.end_run()   


if __name__ == "__main__":
    main()


Overwriting ./components/feature_eng/feature_eng.py


In [15]:
feature_eng_component = command(
                        name="feature_eng",
                        display_name="Feature Engineering",
                        description="Feature engineering component for ANOVA analysis",
                        inputs={
                            "data": Input(type="uri_folder"),   
                        },
                        outputs=dict(
                            output_dir=Output(type="uri_folder", mode="rw_mount"),
                            display_dir=Output(type="uri_folder", mode="rw_mount")
                        ),
                        code=feature_eng_dir,
                        command="""python feature_eng.py \
                                    --data ${{inputs.data}} \
                                    --output_dir ${{outputs.output_dir}}  \
                                    --display_dir ${{outputs.display_dir}} 
                                """,
                        environment=f"{pipeline_job_env.name}:{pipeline_job_env.version}",
                    )

# Register the component
feature_eng_component= ml_client.create_or_update(feature_eng_component.component)
print(f"Component {feature_eng_component.name} with Version {feature_eng_component.version} is registered")

Component feature_eng with Version 2024-10-08-08-06-13-9334982 is registered


## Model Training

In [16]:
train_src_dir = "./components/train"
os.makedirs(train_src_dir, exist_ok=True)

In [17]:
%%writefile {train_src_dir}/train.py

import argparse
import os
import pandas as pd
import mlflow
import numpy as np

from joblib import dump
from joblib import load

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC

from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import f1_score
import mltable

def select_first_file(path):
    files = os.listdir(path)
    return os.path.join(path, files[0])
    
def evaluate_model(scaler, X_train, y_train, num_features, cat_features, model, cv):
    transformer = ColumnTransformer(transformers=[('num', scaler, num_features), ('cat', OneHotEncoder(), cat_features)])
    pipeline = Pipeline([('preprocessor', transformer), ('model', model)])

    ave_precision = np.mean(cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='precision_weighted'))
    ave_recall = np.mean(cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='recall_weighted'))
    ave_f1 = np.mean(cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='f1_weighted'))
    ave_accuracy = np.mean(cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='accuracy'))
    
    return ave_precision, ave_recall, ave_f1, ave_accuracy

# Start Logging
mlflow.start_run()

# Enable autologging
mlflow.sklearn.autolog()

os.makedirs("./outputs", exist_ok=True)


def main():
    """Main function for training the model."""

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="Path to train data")
    parser.add_argument("--cv", type=int, default = 5, help="Path to train data")
    parser.add_argument("--top_models", type=str, help="Path to train data")
    parser.add_argument("--train_data", type=str, help="Path to train data")
    parser.add_argument("--test_data", type=str, help="Path to train data")
    args = parser.parse_args()
    
    num_features,cat_features = [], []
    # Load and prepare training data
    df = pd.read_csv(select_first_file(args.data))

    for col in df.columns:
        if col.endswith('_num'):
            num_features.append(col)
        if col.endswith('_cat'):
            cat_features.append(col)
            
    X = df.drop(columns = "price_range")
    y = df['price_range']

    print(f"Training with data of shape {X.shape}")

    random_state=42

    scalers = [MinMaxScaler(), StandardScaler(), RobustScaler()]
    
    svc_model = SVC(random_state = random_state) 
    gbc_model = GradientBoostingClassifier(random_state = random_state)
    rf_model = RandomForestClassifier(random_state = random_state)
    knn_model = KNeighborsClassifier()
    xgb_model = XGBClassifier(random_state = random_state)
    lgb_model = LGBMClassifier(random_state = random_state, verbose= -1)
    cat_model = CatBoostClassifier(random_state = random_state, logging_level='Silent')
    
    models = ('SVM', svc_model), ('GB', gbc_model), ('RF', rf_model), ('KNN', knn_model), ('XGB', xgb_model), ('LGB', lgb_model), ('CAT', cat_model)

    results = []

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    for scaler in scalers:
        for model_name, model in models:
            ave_precision, ave_recall, ave_f1, ave_acc = evaluate_model(scaler, X_train, y_train, num_features, cat_features, model, args.cv)
    
            results.append({
                'Scaler': scaler,
                'Model': model_name,
                'Precision': ave_precision,
                'Recall': ave_recall,
                'F1 Score': ave_f1,
                'Accuracy': ave_acc
                
            })

    df_model = pd.DataFrame(results).sort_values(by='F1 Score', ascending=False).reset_index(drop=True)
    print(df_model)

    df_model_sorted = df_model.sort_values(by='F1 Score', ascending=False)
    top_unique_models = df_model_sorted.drop_duplicates(subset='Model')
    top_models_df = top_unique_models.head(3)

    print(f"Top 3 models:\n{top_models_df}")

    top_3_csv = os.path.join(args.top_models, "top_3_models.csv")
    top_models_df.to_csv(top_3_csv, index=False)

    train_df = pd.concat([X_train.reset_index(drop=True),y_train.reset_index(drop=True)], axis = 1)
    train_data_csv = os.path.join(args.train_data, "train_data_data.csv")
    train_df.to_csv(train_data_csv, index=False)
    
    test_df = pd.concat([X_test.reset_index(drop=True),y_test.reset_index(drop=True)], axis = 1)
    test_data_csv = os.path.join(args.test_data, "test_data_data.csv")
    test_df.to_csv(test_data_csv, index=False)

    mlflow.log_artifact(top_3_csv)         
    mlflow.end_run()


if __name__ == "__main__":
    main()

Overwriting ./components/train/train.py


In [18]:
%%writefile {dependencies_dir}/train_env.yaml

name: sklearn-1.5
channels:
- conda-forge
- anaconda
dependencies:
- python=3.10
- pip=21.3.1
- pandas~=1.5.3
- scipy~=1.10.0
- numpy~=1.22.0
- pip:
  - scikit-learn-intelex==2024.7.0
  - azureml-core==1.57.0.post1
  - azureml-defaults==1.57.0.post1
  - azureml-mlflow==1.57.0.post1
  - azureml-telemetry==1.57.0
  - scikit-learn~=1.5.0
  - joblib~=1.2.0
  - xgboost~=1.7.0
  - catboost~=1.2.0  
  - lightgbm~=3.3.5  
  - optuna~=3.2.0
  - azure-ai-ml==1.9.0
  - mltable
  # azureml-automl-common-tools packages
  - py-spy==0.3.12
  - debugpy~=1.6.3
  - ipykernel~=6.0
  - tensorboard
  - psutil~=5.8.0
  - matplotlib~=3.5.0
  - tqdm~=4.66.3
  - py-cpuinfo==5.0.0
  - torch-tb-profiler~=0.4.0



Overwriting ./dependencies/train_env.yaml


In [19]:
custom_env_name = "train_env"

pipeline_job_env_train = Environment(
    name=custom_env_name,
    description="custom environment ",
    tags={"scikit-learn": "0.24.2"},
    conda_file=os.path.join(dependencies_dir, "train_env.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    version="0.1.6",
)
pipeline_job_env_train = ml_client.environments.create_or_update(pipeline_job_env_train)

print(
    f"Environment with name {pipeline_job_env_train.name} is registered to workspace, the environment version is {pipeline_job_env_train.version}"
)

Environment with name train_env is registered to workspace, the environment version is 0.1.6


In [20]:
train_component = command(
                        name="train_model",
                        display_name="Train Model",
                        description="",
                        inputs={
                            "data": Input(type="uri_folder"), "cv":Input(type="number") 
                        },
                        outputs=dict(
                            top_models=Output(type="uri_folder", mode="rw_mount"),
                            train_data = Output(type="uri_folder", mode="rw_mount"),
                            test_data = Output(type="uri_folder", mode="rw_mount"),
                    
                        ),
                        code=train_src_dir,
                        command="""python train.py \
                                    --data ${{inputs.data}} \
                                    --cv ${{inputs.cv}}  \
                                    --top_models ${{outputs.top_models}}  \
                                    --train_data ${{outputs.train_data}} \
                                    --test_data ${{outputs.test_data}}
                                """,
                        environment=f"{pipeline_job_env_train.name}:{pipeline_job_env_train.version}",
                    )

# Register the component
train_component= ml_client.create_or_update(train_component.component)
print(f"Component {train_component.name} with Version {train_component.version} is registered")

Component train_model with Version 2024-10-08-08-06-16-9483462 is registered


## Model FineTuning using Optuna

In [21]:
optuna_dir = "./components/finetune"
os.makedirs(optuna_dir, exist_ok=True)

In [22]:
%%writefile {optuna_dir}/utils.py

import argparse
import os
import pandas as pd
import mlflow
import numpy as np
from joblib import dump, load

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc
import optuna

def select_first_file(path):
    files = os.listdir(path)
    return os.path.join(path, files[0])
    
def load_data(data_path):
    df = pd.read_csv(data_path)
    num_features = [col for col in df.columns if col.endswith('_num')]
    cat_features = [col for col in df.columns if col.endswith('_cat')]
    
    X_train = df.drop(columns="price_range")
    y_train = df['price_range']
    return X_train, y_train, num_features, cat_features

def is_scaler(scaler):
    if scaler == 'RobustScaler()':
        scalers = RobustScaler()

    elif scaler == 'StandardScaler()':
        scalers = StandardScaler()

    elif scaler == 'MinMaxScaler()':
        scalers = MinMaxScaler()
    else:
        scalers = None
        print(f"Scaler '{scaler}' is not recognized. Returning None.")
    return scalers

def objective(trial, scaler, model_type, X_train, y_train, num_features, cat_features, cv):


    if model_type == 'CAT':
        learning_rate = trial.suggest_float('learning_rate', 0.01, 0.1)
        max_depth = trial.suggest_int('max_depth', 3, 7)
        n_estimators = trial.suggest_int('n_estimators', 2000, 3000)

        model = CatBoostClassifier(
            max_depth=max_depth,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            logging_level='Silent',
            random_state=42
        )

    elif model_type == 'SVM':
        C = trial.suggest_int('C', 1, 20)
        kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf'])
        degree = trial.suggest_int('degree', 1, 10)

        model = SVC(C=C, kernel=kernel, degree=degree, random_state=42)

    elif model_type == 'LGB':
        num_leaves = trial.suggest_int('num_leaves', 150, 500)
        learning_rate = trial.suggest_float('learning_rate', 0.05, 0.08)
        min_child_samples = trial.suggest_int('min_child_samples', 25, 50)
        n_estimators = trial.suggest_int('n_estimators', 1000, 5000)

        model = LGBMClassifier(
            num_leaves=num_leaves, 
            learning_rate=learning_rate, 
            min_child_samples=min_child_samples,
            n_estimators=n_estimators,
            random_state=42,
            verbose=-1
        )
    else:
        raise ValueError("Unsupported model type")

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', is_scaler(scaler), num_features), 
            ('cat', OneHotEncoder(), cat_features)  
        ]
    )

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    accuracy_scores = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='accuracy')
    precision_scores = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='precision_weighted')
    recall_scores = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='recall_weighted')
    f1_scores = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='f1_weighted')
    
    mlflow.log_metric('average_accuracy', np.mean(accuracy_scores))
    mlflow.log_metric('average_precision', np.mean(precision_scores))
    mlflow.log_metric('average_recall', np.mean(recall_scores))
    mlflow.log_metric('average_f1_score', np.mean(f1_scores))

    return np.mean(f1_scores) , np.mean(accuracy_scores), np.mean(precision_scores), np.mean(recall_scores)

def train_model(model_type, scaler, X_train, y_train,num_features,  cat_features, best_trial):
    best_params = best_trial.params
    if model_type == 'CAT':
        model = CatBoostClassifier(**best_params)
    
    elif model_type == 'SVM':
        model = SVC(**best_params)

    elif model_type == 'LGB':
        model = LGBMClassifier(**best_params)

    else:
        raise ValueError("Unsupported model type")
        
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', is_scaler(scaler), num_features), 
            ('cat', OneHotEncoder(), cat_features)  
        ]
    )
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    return pipeline

Overwriting ./components/finetune/utils.py


## FineTune Model 1

In [24]:
%%writefile {optuna_dir}/finetune_model1.py
from utils import load_data, select_first_file, objective, train_model
import argparse
import os
import pandas as pd
import mlflow
import optuna
from joblib import dump, load
from sklearn.metrics import accuracy_score, precision_score, recall_score

mlflow.start_run()

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data", type=str, required=True, help="Path to train data directory")
    parser.add_argument("--top_model_1", type=str, help="Type of model to optimize")
    parser.add_argument("--n_trials", type=int, help="Type of model to optimize")
    parser.add_argument("--model_1", type=str, help="Type of model to optimize")
    args = parser.parse_args()

    train_data = select_first_file(args.train_data)
    X_train, y_train, num_features, cat_features = load_data(train_data)

    top_models_df = pd.read_csv(select_first_file(args.top_model_1))
    top_model = top_models_df.iloc[0]
    scaler = top_model['Scaler']
    model_type = top_model['Model']
   
    study = optuna.create_study(directions=['maximize', 'maximize', 'maximize', 'maximize'])
    study.optimize(lambda trial: objective(trial, scaler, model_type, X_train, y_train, num_features, cat_features, 5), n_trials=args.n_trials)

    best_trial = study.best_trials[0]

    best_accuracy, best_precision, best_recall, best_f1_score = best_trial.values[0], best_trial.values[1], best_trial.values[2], best_trial.values[3]
    
    print("Best Accuracy:", best_accuracy)
    print("Best Precision:", best_precision)
    print("Best Recall:", best_recall)
    print("Best F1:", best_f1_score)
    print("Best trial parameters:", best_trial.params)

    mlflow.log_metric('best_f1_score', best_f1_score)

    pipeline = train_model(model_type, scaler, X_train, y_train,num_features,  cat_features, best_trial)
    pipeline.fit(X_train, y_train)

    results_df = pd.DataFrame({
        'Model': [model_type],
        'Best_F1_Score': [best_f1_score],
        'Best_Accuracy': [best_accuracy],
        'Best_Precision': [best_precision],
        'Best_Recall': [best_recall],
        'Best Parameter': [best_trial.params],
        
    })


    results_path = os.path.join(args.model_1, "Model_best_1.csv")
    results_df.to_csv(results_path, index=False)
    mlflow.log_artifact(results_path) 

    model_saved_path = os.path.join(args.model_1, "trained_model.pkl")  
    dump(pipeline, model_saved_path)  
    mlflow.log_artifact(model_saved_path)

    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./components/finetune/finetune_model1.py


In [25]:
finetune_model1_component = command(
                        name="finetune_model1",
                        display_name="Finetune Model 1",
                        description="",
                        inputs={
                            'top_model_1': Input(type="uri_folder"),
                            "train_data": Input(type="uri_folder"), 'n_trials':Input(type="number")
                        },
                        outputs=dict(
                            model_1 =Output(type="uri_folder", mode="rw_mount"),
                    
                        ),
                        code=optuna_dir,
                        command="""python finetune_model1.py \
                                    --train_data ${{inputs.train_data}} \
                                    --n_trials ${{inputs.n_trials}} \
                                    --top_model_1 ${{inputs.top_model_1}} \
                                    --model_1 ${{outputs.model_1}} 
                                """,
                        environment=f"{pipeline_job_env_train.name}:{pipeline_job_env_train.version}",
                    )

# Register the component
finetune_model1_component= ml_client.create_or_update(finetune_model1_component.component)
print(f"Component {finetune_model1_component.name} with Version {finetune_model1_component.version} is registered")

Component finetune_model1 with Version 2024-10-08-08-06-19-5382955 is registered


# FineTune Model 2

In [26]:
%%writefile {optuna_dir}/finetune_model2.py
from utils import load_data, select_first_file, objective, train_model
import argparse
import os
import pandas as pd
import mlflow
import optuna
from joblib import dump, load
from sklearn.metrics import accuracy_score, precision_score, recall_score

mlflow.start_run()

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data", type=str, required=True, help="Path to train data directory")
    parser.add_argument("--top_model_2", type=str, help="Type of model to optimize")
    parser.add_argument("--n_trials", type=int, help="Type of model to optimize")
    parser.add_argument("--model_2", type=str, help="Type of model to optimize")
    args = parser.parse_args()

    train_data = select_first_file(args.train_data)
    X_train, y_train, num_features, cat_features = load_data(train_data)

    top_models_df = pd.read_csv(select_first_file(args.top_model_2))
    top_model = top_models_df.iloc[1]
    scaler = top_model['Scaler']
    model_type = top_model['Model']
   
    study = optuna.create_study(directions=['maximize', 'maximize', 'maximize', 'maximize'])
    study.optimize(lambda trial: objective(trial, scaler, model_type, X_train, y_train, num_features, cat_features, 5), n_trials=args.n_trials)
    
    best_trial = study.best_trials[0]

    best_accuracy, best_precision, best_recall, best_f1_score = best_trial.values[0], best_trial.values[1], best_trial.values[2], best_trial.values[3]
    
    print("Best Accuracy:", best_accuracy)
    print("Best Precision:", best_precision)
    print("Best Recall:", best_recall)
    print("Best F1:", best_f1_score)
    print("Best trial parameters:", best_trial.params)

    mlflow.log_metric('best_f1_score', best_f1_score)

    pipeline = train_model(model_type, scaler, X_train, y_train,num_features,  cat_features, best_trial)
    pipeline.fit(X_train, y_train)


    results_df = pd.DataFrame({
        'Model': [model_type],
        'Best_F1_Score': [best_f1_score],
        'Best_Accuracy': [best_accuracy],
        'Best_Precision': [best_precision],
        'Best_Recall': [best_recall],
        'Best Parameter': [best_trial.params],
        
    })

    results_path = os.path.join(args.model_2, "Model_best_2.csv")
    results_df.to_csv(results_path, index=False)
    mlflow.log_artifact(results_path) 

    model_saved_path = os.path.join(args.model_2, "trained_mode2.pkl")  
    dump(pipeline, model_saved_path)  
    mlflow.log_artifact(model_saved_path)

    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./components/finetune/finetune_model2.py


In [27]:
finetune_model2_component = command(
                        name="finetune_model2",
                        display_name="Finetune Model 2",
                        description="",
                        inputs={
                            'top_model_2': Input(type="uri_folder"),
                            "train_data": Input(type="uri_folder"), 'n_trials':Input(type="number")
                        },
                        outputs=dict(
                            model_2 =Output(type="uri_folder", mode="rw_mount"),
                    
                        ),
                        code=optuna_dir,
                        command="""python finetune_model2.py \
                                    --train_data ${{inputs.train_data}} \
                                    --n_trials ${{inputs.n_trials}} \
                                    --top_model_2 ${{inputs.top_model_2}} \
                                    --model_2 ${{outputs.model_2}} 
                                """,
                        environment=f"{pipeline_job_env_train.name}:{pipeline_job_env_train.version}",
                    )

# Register the component
finetune_model2_component= ml_client.create_or_update(finetune_model2_component.component)
print(f"Component {finetune_model2_component.name} with Version {finetune_model2_component.version} is registered")

Component finetune_model2 with Version 2024-10-08-08-06-22-2153979 is registered


# FineTune Model 3

In [28]:
%%writefile {optuna_dir}/finetune_model3.py
from utils import load_data, select_first_file, objective, train_model
import argparse
import os
import pandas as pd
import mlflow
import optuna
from joblib import dump, load
from sklearn.metrics import accuracy_score, precision_score, recall_score
mlflow.start_run()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data", type=str, required=True, help="Path to train data directory")
    parser.add_argument("--top_model_3", type=str, help="Type of model to optimize")
    parser.add_argument("--n_trials", type=int, help="Type of model to optimize")
    parser.add_argument("--model_3", type=str, help="Type of model to optimize")
    args = parser.parse_args()

    train_data = select_first_file(args.train_data)
    X_train, y_train, num_features, cat_features = load_data(train_data)

    top_models_df = pd.read_csv(select_first_file(args.top_model_3))
    top_model = top_models_df.iloc[2]
    scaler = top_model['Scaler']
    model_type = top_model['Model']
   
    study = optuna.create_study(directions=['maximize', 'maximize', 'maximize', 'maximize'])
    study.optimize(lambda trial: objective(trial, scaler, model_type, X_train, y_train, num_features, cat_features, 5), n_trials=args.n_trials)
    

    best_trial = study.best_trials[0]

    best_accuracy, best_precision, best_recall, best_f1_score = best_trial.values[0], best_trial.values[1], best_trial.values[2], best_trial.values[3]
    
    print("Best Accuracy:", best_accuracy)
    print("Best Precision:", best_precision)
    print("Best Recall:", best_recall)
    print("Best F1:", best_f1_score)
    print("Best trial parameters:", best_trial.params)

    mlflow.log_metric('best_f1_score', best_f1_score)


    pipeline = train_model(model_type, scaler, X_train, y_train,num_features,  cat_features, best_trial)
    pipeline.fit(X_train, y_train)
 
    results_df = pd.DataFrame({
        'Model': [model_type],
        'Best_F1_Score': [best_f1_score],
        'Best_Accuracy': [best_accuracy],
        'Best_Precision': [best_precision],
        'Best_Recall': [best_recall],
        'Best Parameter': [best_trial.params],
        
    })

    results_path = os.path.join(args.model_3, "Model_best_3.csv")
    results_df.to_csv(results_path, index=False)
    mlflow.log_artifact(results_path) 

    model_saved_path = os.path.join(args.model_3, "trained_mode3.pkl")  
    dump(pipeline, model_saved_path)  
    mlflow.log_artifact(model_saved_path)

    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./components/finetune/finetune_model3.py


In [29]:
finetune_model3_component = command(
                        name="finetune_model2",
                        display_name="Finetune Model 3",
                        description="",
                        inputs={
                            'top_model_3': Input(type="uri_folder"),
                            "train_data": Input(type="uri_folder"), 'n_trials':Input(type="number")
                        },
                        outputs=dict(
                            model_3  =Output(type="uri_folder", mode="rw_mount"),
                    
                        ),
                        code=optuna_dir,
                        command="""python finetune_model3.py \
                                    --train_data ${{inputs.train_data}} \
                                    --n_trials ${{inputs.n_trials}} \
                                    --top_model_3 ${{inputs.top_model_3}} \
                                    --model_3 ${{outputs.model_3}} 
                                """,
                        environment=f"{pipeline_job_env_train.name}:{pipeline_job_env_train.version}",
                    )

# Register the component
finetune_model3_component= ml_client.create_or_update(finetune_model3_component.component)
print(f"Component {finetune_model3_component.name} with Version {finetune_model3_component.version} is registered")

Component finetune_model2 with Version 2024-10-08-08-06-25-2930373 is registered


# Evaluate

In [30]:
%%writefile {optuna_dir}/evaluate.py

import argparse
import os
import pandas as pd
import mlflow
import numpy as np
from joblib import dump, load
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
import optuna

def select_first_file(path, num):
    files = os.listdir(path)
    return os.path.join(path, files[num])

def score(model, X_test, y_test):
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return precision,recall,f1
    
    
mlflow.sklearn.autolog()

os.makedirs("./outputs", exist_ok=True)

def main():
    """Main function for training the model."""
    
    parser = argparse.ArgumentParser()
    parser.add_argument("--test_data", type=str, help="Path to test data")
    parser.add_argument("--model_1", type=str, help="Path to train data")
    parser.add_argument("--model_2", type=str, help="Path to train data")
    parser.add_argument("--model_3", type=str, help="Path to train data")
    parser.add_argument("--registered_model_name", type=str, help="Model name")
    parser.add_argument("--best_model", type=str, help="path to model file")
    args = parser.parse_args()
    

    model1 = load(select_first_file(args.model_1,1))
    model2 = load(select_first_file(args.model_2,1))
    model3 = load(select_first_file(args.model_3,1))

    test_df = pd.read_csv(select_first_file(args.test_data,0))

    X_test = test_df.drop(columns="price_range")
    y_test = test_df['price_range']

    precision_1,recall_1,f1_1 = score(model1, X_test, y_test)
    precision_2,recall_2,f1_2 = score(model2, X_test, y_test)
    precision_3,recall_3,f1_3 = score(model3, X_test, y_test)
    
    scores_df = pd.DataFrame({'model': ['model1', 'model2', 'model3'],
                                'precision': [precision_1, precision_2, precision_3],
                                'recall': [recall_1, recall_2, recall_3],
                                'f1': [f1_1, f1_2, f1_3]})

    sorted_scores_df = scores_df.sort_values(by='f1', ascending=False)

    Best_model = sorted_scores_df.iloc[0]['model']
    best_f1score = sorted_scores_df.iloc[0]['f1']

    if Best_model == 'model1':
        Best_model = model1
    elif Best_model == 'model2':
        Best_model = model2
    else:
        Best_model = model3

    results_path = os.path.join(args.best_model, "test_result.csv")
    sorted_scores_df.to_csv(results_path, index=False)
    mlflow.log_artifact(results_path) 

    if mlflow.active_run():
        mlflow.end_run()
        
    with mlflow.start_run(run_name=args.registered_model_name):
        print("Registering the model via MLFlow")
        mlflow.sklearn.log_model(
            sk_model=Best_model,
            registered_model_name=args.registered_model_name,
            artifact_path='best_model',  
        )

    mlflow.end_run()


if __name__ == "__main__":
    main()

Overwriting ./components/finetune/evaluate.py


In [31]:
evaluate_component = command(
                        name="evaluation",
                        display_name="Evaluation",
                        description="",
                        inputs={
                            'model_1': Input(type="uri_folder"),
                            "model_2": Input(type="uri_folder"), 'model_3':Input(type="uri_folder"),
                            'test_data': Input(type="uri_folder"), 'registered_model_name':Input(type="string")
                        },
                        outputs=dict(
                            best_model  =Output(type="uri_folder", mode="rw_mount"),
                    
                        ),
                        code=optuna_dir,
                        command="""python evaluate.py \
                                    --model_1 ${{inputs.model_1}} \
                                    --model_2 ${{inputs.model_2}} \
                                    --model_3 ${{inputs.model_3}} \
                                    --test_data ${{inputs.test_data}} \
                                    --registered_model_name ${{inputs.registered_model_name}} \
                                    --best_model ${{outputs.best_model}}
    
                                """,
                        environment=f"{pipeline_job_env_train.name}:{pipeline_job_env_train.version}",
                    )

# Register the component
evaluate_component= ml_client.create_or_update(evaluate_component.component)
print(f"Component {evaluate_component.name} with Version {evaluate_component.version} is registered")

Component evaluation with Version 2024-10-08-08-06-28-2126087 is registered


# Connect Pipeline

In [34]:
@dsl.pipeline(
    compute="serverless",
    description="train pipeline",
)
def train_pipeline(pipeline_job_data_input, pipeline_job_cv, 
                   pipeline_job_n_trials,pipeline_job_registered_model_name): 

    data_preprocessing_job = data_preprocessing_component(
        data = pipeline_job_data_input
    )
    feature_eng_job = feature_eng_component(
        data = data_preprocessing_job.outputs.clean_data
    )   
    train_job = train_component(
        data = feature_eng_job.outputs.output_dir,
        cv = pipeline_job_cv
         
    ) 

    finetune_model1_job = finetune_model1_component(
            train_data = train_job.outputs.train_data,
            n_trials = pipeline_job_n_trials,
            top_model_1 = train_job.outputs.top_models
    )
    finetune_model2_job = finetune_model2_component(
            train_data = train_job.outputs.train_data, 
            n_trials = pipeline_job_n_trials,
            top_model_2 = train_job.outputs.top_models
    )

    finetune_model3_job = finetune_model3_component(
        train_data = train_job.outputs.train_data,
        n_trials = pipeline_job_n_trials,
        top_model_3 = train_job.outputs.top_models
    ) 

    evaluate_job = evaluate_component(
        model_1 = finetune_model1_job.outputs.model_1,
        model_2 = finetune_model2_job.outputs.model_2,
        model_3 = finetune_model3_job.outputs.model_3,
        test_data = train_job.outputs.test_data,
        registered_model_name = pipeline_job_registered_model_name,
    ) 
    
    return

In [35]:
registered_model_name = "train_model"


# Let's instantiate the pipeline with the parameters of our choice
pipeline = train_pipeline(pipeline_job_data_input=Input(type="uri_file", path=data.path),
                          pipeline_job_cv = 5,
                          pipeline_job_n_trials = 50,
                          pipeline_job_registered_model_name=registered_model_name,
                        
                          )

In [36]:
# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    # Project's name
    experiment_name="Mobile_train_pipeline",
)
# open the pipeline in web browser
webbrowser.open(pipeline_job.studio_url)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


False

# Create Name for end point

In [37]:
import uuid

online_endpoint_name = "mobile-endpoint1-" + str(uuid.uuid4())[:8]

If you encountered this error : 'HttpResponseError: (SubscriptionNotRegistered) Resource provider [N/A] isn't registered with Subscription [N/A].' 

Do the following

Subscriptions > Azure subscription 1 > Resource providers > search for 'Microsoft.Cdn'  
and  'Microsoft.PolicyInsights'register

In [38]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
)

endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is an online endpoint",
    auth_mode="key",
    tags={
        "training_dataset": "mobile_defaults",
        "model_type": "sklearn.RandomForestClassifier",
    },
)

endpoint_result = ml_client.begin_create_or_update(endpoint).result()

print(
    f"Endpint {endpoint_result.name} provisioning state: {endpoint_result.provisioning_state}"
)


endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

print( f'Endpint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved')

Endpint mobile-endpoint1-f51bfc69 provisioning state: Succeeded
Endpint "mobile-endpoint1-f51bfc69" with provisioning state "Succeeded" is retrieved


# Select lastest Model

In [41]:
latest_model_version = max(
    [int(m.version) for m in ml_client.models.list(name=registered_model_name)]
)
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)

In [42]:
%%writefile {dependencies_dir}/env.yaml
name: custom_env_name
channels:
  - defaults
  - anaconda
  - conda-forge
dependencies:
  - python=3.8.0
  - pip:
      - azureml-mlflow==1.42.0
      - azureml-sdk==1.38.0
      - scikit-learn==0.24.2
      - pandas>=1.1,<1.2
      - numpy=1.21.2
      - azureml-inference-server-http 
      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
      - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector


Overwriting ./dependencies/env.yaml


In [43]:
environment = Environment(
    name=custom_env_name,
    conda_file=os.path.join(dependencies_dir, "env.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    
)
environment = ml_client.environments.create_or_update(environment)

# Score Script for Inference

In [44]:
scoring_dir = "./components/scoring_script"
os.makedirs(scoring_dir, exist_ok=True)

In [45]:
%%writefile {scoring_dir}/scoring_script.py

import joblib
import json
import os
import pandas as pd

def init():
    global model
    model_dir = os.environ["AZUREML_MODEL_DIR"]
    model_path = os.path.join(model_dir, "best_model/model.pkl")
    model = joblib.load(model_path)

def run(data):
    try:
        input_data = json.loads(data)  
        columns = ["ram_num", "battery_power_num", "px_width_num", "px_height_num", 
                   "mobile_wt_num", "int_memory_num", "n_cores_cat"]
                   
        df = pd.DataFrame(input_data["input_data"]["data"], columns=columns)
        
        prediction = model.predict(df)
        
        return prediction.tolist() 
    
    except Exception as e:
        return {"error": str(e)}



Overwriting ./components/scoring_script/scoring_script.py


In [46]:
from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration

code_config = CodeConfiguration(
    code="./components/scoring_script",  
    scoring_script="scoring_script.py")

mobile_deployment = ManagedOnlineDeployment(
    name="mobile",
    endpoint_name=online_endpoint_name,
    model= model,
    environment= 'azureml://registries/azureml/environments/sklearn-1.5/labels/latest',
    code_configuration= code_config,
    instance_type="Standard_DS1_v2", #Standard_DS2_v2
    instance_count=1
   
)

mobile_deployment_results = ml_client.online_deployments.begin_create_or_update(
    mobile_deployment
).result()

print(
    f"Deployment {mobile_deployment_results.name} provisioning state: {mobile_deployment_results.provisioning_state}"
)


Instance type Standard_DS1_v2 may be too small for compute resources. Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: https://learn.microsoft.com/en-us/azure/machine-learning/referencemanaged-online-endpoints-vm-sku-list
Check: endpoint mobile-endpoint1-f51bfc69 exists


................................................................................Deployment mobile provisioning state: Succeeded


In [47]:
# Ensure that the endpoint is in the "Succeeded" state before allocating traffic
if endpoint.provisioning_state == "Succeeded":
    print(f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved')

    # Retrieve all deployments associated with this endpoint
    deployments = ml_client.online_deployments.list(endpoint.name)
    
    # Find the specific deployment (e.g., "mobile") and check its state
    deployment = next((d for d in deployments if d.name == "mobile"), None)

    if deployment:
        deployment_status = deployment.provisioning_state
        print(f"Deployment 'mobile' state: {deployment_status}")

        if deployment_status == "Succeeded":
            # Assign 100% traffic to the 'mobile' deployment
            endpoint.traffic = {
                "mobile": 100  # 100% traffic to the 'mobile' deployment
            }
            # Apply the traffic allocation
            ml_client.online_endpoints.begin_create_or_update(endpoint).result()
            print(f"Traffic for endpoint {endpoint.name} is now routed to the 'mobile' deployment")
        else:
            print(f"Deployment 'mobile' is not in a valid state. Current state: {deployment_status}")
    else:
        print("Deployment 'mobile' not found.")
else:
    print(f"Endpoint is not in a 'Succeeded' state. Current state: {endpoint.provisioning_state}")




Endpoint "mobile-endpoint1-f51bfc69" with provisioning state "Succeeded" is retrieved
Deployment 'mobile' state: Succeeded
Traffic for endpoint mobile-endpoint1-f51bfc69 is now routed to the 'mobile' deployment


Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


# Inference

In [48]:
deploy_dir = "./deploy"
os.makedirs(deploy_dir, exist_ok=True)

In [49]:
%%writefile {deploy_dir}/sample-request1234.json

{
    "input_data": {
        "data": [
            [2549, 842, 756, 26.696685, 188, 7, 2],
            [2863, 826, 786, 21.834586, 88, 58, 4],
            [5000, 1000, 800, 24.34, 120, 58, 4]
        ]
    }
}



Overwriting ./deploy/sample-request1234.json


In [50]:
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="./deploy/sample-request1234.json",
    deployment_name="mobile",
)

'[1, 2, 3]'