In [None]:
# Check core SDK version number
import azureml.core

print("Core version:", azureml.core.VERSION)

#### Connect to workspace via config.json

In [None]:
from azureml.core import Workspace

workspace = Workspace.from_config()
print(workspace)

# Uploading to Regular Datastore

## Upload files to datastore and create a file dataset

```python
    def upload_directory(src_dir, target, pattern=None, overwrite=False, show_progress=True):
        """Upload source directory to target datastore and create a file dataset

        :param src_dir: The local directory to upload.
        :type src_dir: str
        :param target: Required, the datastore path where the files will be uploaded to.
        :type target: azureml.data.datapath.DataPath, azureml.core.datastore.Datastore
            or tuple(azureml.core.datastore.Datastore, str) object
        :param pattern: Optional, If provided, will filter all the path names matching the given pattern,
            similar to Python glob package, supporting '*', '?', and character ranges expressed with [].
        :type pattern: str
        :param show_progress: Optional,
            indicates whether to show progress of the upload in the console. Defaults to be True.
        :type show_progress: bool
        :return: The created file dataset.
        :rtype: azureml.data.FileDataset
        """
```

In [None]:
# Syntax:
# Dataset.File.upload_directory(src_dir='<your source folder for uploading>/',
#            target=DataPath(<datastore>, '<upload path on the datastore>'),
#            show_progress=True)
# Using workspace default datastore for uploading Pandas dataframe
from azureml.core import Dataset
from azureml.data.datapath import DataPath

# Getting workspace default datastore
datastore = workspace.get_default_datastore()
# Uploading local directory and creating a FileDataset
dataset = Dataset.File.upload_directory(
    src_dir="./data/", target=DataPath(datastore, "/data/"), show_progress=True
)
# You can register the datastore created by uploading the directory
dataset.register(
    workspace=workspace,
    name="ds_from_directory",
    description="This dataset was creating by uploading a directory",
)

In [None]:
# download files back to local
dataset.download("./downloads")

## Upload files to datastore matching a pattern and create a file dataset

In [None]:
# Upload source files based on their path name using Pattern
# In this example, we are going to use only .csv files
dataset_pattern = Dataset.File.upload_directory(
    src_dir="./data/",
    target=DataPath(datastore, "/data_pattern/"),
    pattern="*.csv",
    show_progress=True,
)

# Uploading to credential-less datastore

## Register Azure Data Lake Storage Gen1 (ADLS Gen1) as credential-less datastore

#### Note
Following code examples are shown as markdown cells. To run these examples:
- Remove ``` at the beginning and end of the cells.
- Convert these markdown cells to code cells.
- Enter appropriate values inside the '< >' to successfully execute the sample codes.

```
from azureml.core import Datastore
adlsgen1_datastore_name = 'adlgen1credlessstore' # Datastore name
store_name = '<your ADLS store name>' # ADLS Gen1 storage account name
subscription_id = '<your subscription id>' # subscription for the ADLS Gen1 storage account
resource_group = '<your resource group>' # resource group for the ADLS Gen1 storage account

adls_datastore_cred_less = Datastore.register_azure_data_lake(
   workspace=ws,
   datastore_name=adlsgen1_datastore_name, # Datastore name
   subscription_id=subscription_id, # subscription id for the ADLS Gen1 storage account
   resource_group=resource_group, # resource group for the ADLS Gen1 storage account
   store_name=store_name) # ADLS Gen1 storage account name
```

## Upload files to cred-less datastore and create a file dataset


```
dataset_credless = Dataset.File.upload_directory(src_dir='<your source folder for uploading>/',
                    target=DataPath(adls_datastore_cred_less, '<upload path on the datastore>'),
                    overwrite=True,
                    show_progress=True)
```

# Uploading to credential-less datastore using MSI

## Create or Attach existing compute resource

#### Note
If using an existing compute target, please make sure the compute target has an identity attached, or type a new name to let the below script create a new one.

```
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = '<your compute cluster name>'

try:
    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4,
                                                           identity_type='SystemAssigned')

    # create the cluster
    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())
```

#### Note
Make sure that you grant access to the compute cluster created here in the Azure Data Lake Store Gen1 account (not only the storage account, but also the specific folder to access):
- Go to the Azure Data Lake Store Gen1 portal, click on "Data explorer".
- Click on "Access" then click "+".
- In the opened page, click "Select" and search for below content:
  - "{your workspace name}/computes/{the compute cluster name created above}" </dd>
- Grant all needed permissions.

## Prepare the remote run script

#### Note
Please replace credential-less datastore name created above and the datapath where files are uploaded.

```
%%writefile data/run.py
def run():
    from azureml.core import Dataset, Datastore
    from azureml.data.datapath import DataPath
    from azureml.core.run import Run
    import glob

    ws = Run.get_context().experiment.workspace
    print('Got workspace')
    print(ws)
    print('Getting datastore')
    dstore = Datastore.get(ws, 'adlgen1credlessstore')
    print('Got datastore')
    print(dstore)
    datapath = DataPath(dstore, '/%s/' % 'dataset_from_compute')
    print(datapath)
    saved_dataset = Dataset.File.upload_directory(
        src_dir='./',
        target=datapath,
        overwrite=True,
        show_progress=True)
    print(saved_dataset)

run()
```

## Upload files from remote context to target credential-less datastore

```
from azureml.core import ScriptRunConfig, RunConfiguration, Experiment

rc = RunConfiguration()
rc.target = compute_target

# create or load an experiment
experiment = Experiment(workspace=workspace, name='MyUploadingExperiment')
# run a trial from the train.py code in your current directory
config = ScriptRunConfig(source_directory='data', script='run.py', run_config=rc)
run = experiment.submit(config)
run.wait_for_completion()
```