# Basic AzureML SDK functionality

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()

# Or you can use sample seen in datasets
# ws = Workspace(subscription_id, resource_group, workspace_name)

print(ws.name)

## Computes

In [None]:
for compute_name in ws.compute_targets:
    compute = ws.compute_targets[compute_name]
    print(f"Compute {compute.name} is a {type(compute)}")

In [None]:
# Provision a small compute cluster
from azureml.core.compute import ComputeTarget, AmlCompute

compute_name = 'cpu-sm-cluster'
cluster = None

if compute_name in ws.compute_targets:
    print('Getting reference to compute cluster')
    cluster = ws.compute_targets[compute_name]
else:
    print('Creating compute cluster')
    config = AmlCompute.provisioning_configuration(
                           vm_size='Standard_D1', 
                           max_nodes=2)
    cluster = ComputeTarget.create(ws, compute_name, config)
    cluster.wait_for_completion(show_output=True)

print(f"Got reference to cluster {cluster.name}")

## Datastores

In [None]:
from azureml.core import Datastore

dstore_name='workspaceblobstore'

dstore = Datastore.get(ws, dstore_name)
# OR
dstore = ws.datastores[dstore_name]
# OR simpler
dstore = ws.get_default_datastore()

print(dstore)

In [None]:
dstore.upload_files(
    files=['diabetes_raw_data.csv'],
    target_path="samples/diabetes/v1", 
    overwrite=True,
    show_progress=True)

## Datasets

In [None]:
file_paths = [
    (dstore, "/samples/diabetes/v1")
]

In [None]:
# Define a FileDataset
from azureml.core import Dataset
file_ds = Dataset.File.from_files(
    path = file_paths, validate = True
)

print("Files in FileDataset:")
print(file_ds.to_path())

In [None]:
# Define a TabularDataset
tabular_ds = Dataset.Tabular.from_delimited_files(
    path=file_paths,
    validate=False)

df = tabular_ds.to_pandas_dataframe()
print(len(df))
df.head()

In [None]:
file_ds.register(
    workspace=ws,
    name="diabetes-file",
    description="The sklearn diabetes dataset")

tabular_ds.register(
    workspace=ws,
    name="diabetes-tabular",
    description="The sklearn diabetes dataset")

### Loading partial tabular datasets

In [None]:
partial_ds = tabular_ds \
        .skip(10) \
        .take(2) \
        .keep_columns(['0','target'])

In [None]:
# This is when the dataset loads in memory
df = partial_ds.to_pandas_dataframe()
df.head()