<a href="https://colab.research.google.com/github/Jushef/Azure-AutoML/blob/main/MMSA/02_AutoML_Training_Pipeline_My_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from azureml.core import Workspace, Datastore, Dataset
from azureml.core import Experiment
from azureml.core.compute import ComputeTarget
import pandas as pd
import os

from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder
from azureml.pipeline.core import Pipeline
from scripts.helper import get_training_output
import logging


In [None]:
#!pip install azureml.contrib.automl.pipeline.steps

In [None]:
ws= Workspace.from_config()

In [None]:
dstore = ws.get_default_datastore()

In [None]:
experiment = Experiment(ws, 'manymodels-training-pipeline-pandas-data')

In [None]:
filedst_10_models = Dataset.get_by_name(ws, name='MMSA_Sample_train')
filedst_10_models_input = filedst_10_models.as_named_input('MMSA_Sample_train')

In [None]:
compute = ComputeTarget(ws, 'compute-cluster')

In [None]:
partition_column_names = ['Store']

In [None]:
automl_settings = {
    "task" : 'forecasting',
    "primary_metric" : 'normalized_root_mean_squared_error',
    "iteration_timeout_minutes" : 10, 
    "iterations" : 15,
    "experiment_timeout_hours" : 1,
    "label_column_name" : 'Sales',
    "n_cross_validations" : 3,
    "verbosity" : logging.INFO, 
    "debug_log": 'automl_pandas_debug.txt',
    "time_column_name": 'Date',
    "max_horizon" : 31,
    "track_child_runs": False,
    "partition_column_names": partition_column_names,
    "grain_column_names": ['Store'],
    "pipeline_fetch_max_batch_size": 15
}

In [None]:
train_steps = AutoMLPipelineBuilder.get_many_models_train_steps(experiment=experiment,
                                                                automl_settings=automl_settings,
                                                                train_data=filedst_10_models_input,
                                                                compute_target=compute,
                                                                partition_column_names=partition_column_names,
                                                                node_count=4,
                                                                process_count_per_node=4,
                                                                run_invocation_timeout=3700,
                                                                output_datastore=dstore)

In [None]:
pipeline = Pipeline(workspace=ws, steps=train_steps)
run = experiment.submit(pipeline)

In [None]:
run.wait_for_completion(show_output=True)

In [None]:
published_pipeline = pipeline.publish(name = 'automl_train_many_models_pandas',
                                      description = 'MMSA Solution using x data',
                                      version = '1',
                                      continue_on_step_failure = False)

In [None]:
training_results_name = "training_results"
training_output_name = "many_models_training_output"

training_file = get_training_output(run, training_results_name, training_output_name)
all_columns = ["Framework", "Dataset", "Run", "Status", "Model", "Tags", "StartTime", "EndTime" , "ErrorType", "ErrorCode", "ErrorMessage" ]
df = pd.read_csv(training_file, delimiter=" ", header=None, names=all_columns)
training_csv_file = "training.csv"
df.to_csv(training_csv_file)
print("Training output has", df.shape[0], "rows. Please open", os.path.abspath(training_csv_file), "to browse through all the output.")

In [None]:
# from azureml.pipeline.core import Schedule, ScheduleRecurrence
    
# training_pipeline_id = published_pipeline.id

# recurrence = ScheduleRecurrence(frequency="Month", interval=1, start_time="2020-01-01T09:00:00")
# recurring_schedule = Schedule.create(ws, name="automl_training_recurring_schedule", 
#                             description="Schedule Training Pipeline to run on the first day of every month",
#                             pipeline_id=training_pipeline_id, 
#                             experiment_name=experiment.name, 
#                             recurrence=recurrence)