# Connect to Workspace

Connect to your AML workspace.
Use the Python 3.8 - AzureML kernel

In [None]:
from azureml.core.workspace import Workspace

try:    
    # Get instance of the Workspace and write it to config file
    ws = Workspace(
        subscription_id = '<subscription_id>', 
        resource_group = '<resource_group>', 
        workspace_name = '<workspace_name>')

    # Writes workspace config file
    ws.write_config()
    
    print('Library configuration succeeded')
except Exception as e:
    print(e)
    print('Workspace not found')

# Upload files to datastore

Load the datasets into blob storage.

In [None]:
from azureml.core import Workspace, Datastore, Dataset
from azureml.data.datapath import DataPath

ws = Workspace.from_config()
datastore = Datastore.get(ws, 'workspaceblobstore')
ds = Dataset.File.upload_directory(src_dir='../../azureStorageFiles',
    target=DataPath(datastore,'/flightdelay'),
    overwrite=True,
    show_progress=True)

# Load files to tabular Dataset

A range of datasets are used to support different parts of the demo.

In [9]:
from azureml.core import Dataset

airports = Dataset.Tabular.from_delimited_files(path=[(datastore, 'airports.csv')])
carriersdataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'carriersdataset.csv')])
flight_dataset_2008 = Dataset.Tabular.from_delimited_files(path=[(datastore, 'flight_dataset_2008.csv')])
plane_data = Dataset.Tabular.from_delimited_files(path=[(datastore, 'plane-data.csv')])
flightdelayweather_ds_clean = Dataset.Tabular.from_delimited_files(path=[(datastore, 'flightdelayweather_ds_clean.csv')])
flight_dataset_2008_with_weather = Dataset.Tabular.from_delimited_files(path=[(datastore, 'flight_dataset_2008_with_weather.csv')])

# Register Datasets to Workspace

By registering the datasets, we can link experiment runs to them throughout the demo series.

In [11]:
airports_ds = airports.register(workspace=ws, name='airports_ds', create_new_version=True)
flightdelayweather_ds = flight_dataset_2008_with_weather.register(workspace=ws, name='flightdelayweather_ds', create_new_version=True)
carriers_ds = carriersdataset.register(workspace=ws, name='carriers_ds', create_new_version=True)
flightdelay_ds_raw = flight_dataset_2008.register(workspace=ws, name='flightdelay_ds_raw', create_new_version=True)
flightdelay_ds = flight_dataset_2008_with_weather.register(workspace=ws, name='flightdelay_ds', create_new_version=True)
flight_dataset_2008_with_weather = flight_dataset_2008_with_weather.register(workspace=ws, name='flight_dataset_2008_with_weather', create_new_version=True)
flightdelayweather_ds_clean = flightdelayweather_ds_clean.register(workspace=ws, name='flightdelayweather_ds_clean', create_new_version=True)


# Create AML Cluster

A CPU cluster is used for remote training scenarios.

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

### Create AML CPU Compute Cluster

try:
    compute_target = ComputeTarget(workspace=ws, name='cpucluster')
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS12_v2',
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, 'cpucluster', compute_config)

    compute_target.wait_for_completion(show_output=True)