## Understanding the data

https://www.kaggle.com/c/playground-series-s3e14

## Using the Azure SDK 

In [2]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: .\config.json


In [3]:
import mltable

paths = [
    {"file": "./data/train.csv"}
]

train_table = mltable.from_delimited_files(paths)
train_table.save('./data/train')

paths:
- file: file://c:\Users\leopu\OneDrive\Programming\Python\azure\azure-automl-webinar\data\train.csv
transformations:
- read_delimited:
    delimiter: ','
    empty_as_string: false
    encoding: utf8
    header: all_files_same_headers
    include_path_column: false
    infer_column_types: true
    partition_size: 20971520
    path_column: Path
    support_multi_line: false
type: mltable

In [4]:
#!az ml compute create -f compute.yml

In [None]:
# Compute Instances need to have a unique name across the region.
# Here we create a unique name with current datetime
from azure.ai.ml.entities import ComputeInstance, AmlCompute
import datetime

compute_name = "automl-webinar"
ci_basic = ComputeInstance(
    name=compute_name, 
    size="STANDARD_D2AS_V4", # 2 cores, 8GB RAM, 16GB Storage
    idle_time_before_shutdown_minutes="30"
)
ml_client.begin_create_or_update(ci_basic).result()

In [None]:
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import automl, Input

# note that this is a code snippet -- you might have to modify the variable values to run it successfully

# make an Input object for the training data
training_data_input = Input(
    type=AssetTypes.MLTABLE, path="./data/training-mltable-folder"
)

# configure the classification job
classification_job = automl.regression(
    compute=compute_name,
    experiment_name="automl-webinar-blueberry-prediction",
    training_data=training_data_input,
    target_column_name="yield",
    primary_metric="mae",
    n_cross_validations=5,
    enable_model_explainability=True,
    tags={"my_custom_tag": "My custom value"}
)

# Limits are all optional
classification_job.set_limits(
    timeout_minutes=600, 
    trial_timeout_minutes=20, 
    max_trials=5,
    enable_early_termination=True,
)

# Training properties are optional
classification_job.set_training(
    blocked_training_algorithms=["logistic_regression"], 
    enable_onnx_compatible_models=True
)

## Alternatives to Azure AutoML