## Register Dataset from Blob

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.workspace import Workspace

# Configure workspace
auth = InteractiveLoginAuthentication()
ws = Workspace.from_config(auth=auth)

In [None]:
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset
from azureml.data.dataset_type_definitions import FileType

dstore_name = 'packtmldemodatastore'
ds_dir = "ml-latest-small"
ds_files = ['movies', 'links', 'ratings', 'tags']

# Configure workspace
ws = Workspace.from_config()

# retrieve the datastore
dstore = Datastore.get(ws, dstore_name)

for file_name in ds_files:

    # create an in-memory Dataset on your local machine
    datapath = dstore.path('{}/{}.csv'.format(ds_dir, file_name))
    dataset = Dataset.auto_read_files(datapath)

    # returns the first 5 rows of the Dataset as a pandas Dataframe.
    # dataset.head(5)

    # Register the dataset
    # file_type=FileType.CSV not yet supported
    dataset.register(workspace=ws, name='movielens100k.{}'.format(file_name),
        description='Movielens 100k - {} data'.format(file_name), exist_ok=True)

# See list of datasets registered in workspace
datasets = Dataset.list(ws)
for ds in datasets:
    print(ds)
    print(ds.head(5))
    print('---')

## Register Dataset from Pandas

In [None]:
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset
from azureml.data.data_reference import DataReference

dstore_name = 'mldemodatastore'
ds_dir = "ml-latest-small"
ds_file = "movielens100k.movies"

# Configure workspace
ws = Workspace.from_config()

# Access your dataset
dataset = Dataset.get(ws, ds_file)

# Load in-memory Dataset to your local machine as pandas dataframe
pdDf = dataset.to_pandas_dataframe()
print(pdDf.head())

# retrieve the datastore
dstore = Datastore.get(ws, dstore_name)

# create a data reference
# important, path needs to be a directory with trailing slash
data_ref = DataReference(dstore, path_on_datastore='preprocessed/movies/')

# Upload data to datastore dstore in path
dataset_tf = Dataset.from_pandas_dataframe(pdDf, path=data_ref)

# Register the Pandas DataFrame as Dataset
dataset_tf.register(workspace=ws, name=ds_file,
    description='Movielens 100k - movies data preprocessed', exist_ok=True)

## Dataset to Pandas

In [None]:
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset

dstore_name = 'mldemodatastore'
ds_file = "movielens100k.movies"

# Configure workspace
ws = Workspace.from_config()


# Access your dataset
dataset = Dataset.get(ws, 'movielens100k.movies')

# Profile your dataset
print(dataset.get_profile())

# Load in-memory Dataset to your local machine as pandas dataframe
df = dataset.to_pandas_dataframe()
print(df.head())