In [None]:
import pandas as pd
import numpy as np
import datetime as dt
from azureml.core import Workspace
from scripts.helper import split_data

In [None]:
ManyModelsSample = pd.read_csv('ManyModelsSampleData.csv', header = 0) 

In [None]:
target_path = 'MMSA_Sample_Folder' 
os.makedirs(target_path, exist_ok=True)

In [None]:
ManyModelsSample

In [None]:
ManyModelsSample['Date'] = ManyModelsSample['Date'].apply(lambda x: dt.datetime.strptime(x, '%m/%d/%Y'))

In [None]:
for x, y in ManyModelsSample.groupby('Store'):
    y.to_csv('MMSA_Sample_Folder/{}.csv'.format(x), header=True, index_label=False)

## 2.0 Split data in two sets

We will now split each dataset in two parts: one will be used for training, and the other will be used for simulating batch forecasting. The training files will contain the data records before '1992-5-28' and the last part of each series will be stored in the inferencing files.

Finally, we will upload both sets of data files to the Workspace's default [Datastore](https://docs.microsoft.compython/api/azureml-core/azureml.core.datastore(class)).

In [None]:
# Provide name of timestamp column in the data and date from which to split into the inference dataset
timestamp_column = 'Date'
split_date = '2021-03-01'

# Split each file and store in corresponding directory
train_path, inference_path = split_data(target_path, timestamp_column, split_date)

## 3.0 Upload data to Datastore in AML Workspace

In the [setup notebook](00_Setup_AML_Workspace.ipynb) you created a [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace.workspace). We are going to register the data in that enviroment.

In [None]:
ws = Workspace.from_config()

We will upload both sets of data files to your Workspace's default [Datastore](https://docs.microsoft.com/azure/machine-learning/how-to-access-data). 
A Datastore is a place where data can be stored that is then made accessible for training or forecasting. Please refer to [Datastore documentation](https://docs.microsoft.com/python/api/azureml-core/azureml.core.datastore(class)) on how to access data from Datastore.

In [None]:
# Connect to default datastore
datastore = ws.get_default_datastore()

# Upload train data
ds_train_path = target_path + '_train'
datastore.upload(src_dir=train_path, target_path=ds_train_path, overwrite=True)

# Upload inference data
ds_inference_path = target_path + '_inference'
datastore.upload(src_dir=inference_path, target_path=ds_inference_path, overwrite=True)

In [None]:
from azureml.core.dataset import Dataset

# Create file datasets
ds_train = Dataset.File.from_files(path=datastore.path(ds_train_path), validate=False)
ds_inference = Dataset.File.from_files(path=datastore.path(ds_inference_path), validate=False)

# Register the file datasets
dataset_name = 'MMSA_Sample'
train_dataset_name = dataset_name + '_train'
inference_dataset_name = dataset_name + '_inference'
ds_train.register(ws, train_dataset_name, create_new_version=True)
ds_inference.register(ws, inference_dataset_name, create_new_version=True)