## 1. Load libraries

In [None]:
from azure.identity import ClientSecretCredential
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.datafactory import DataFactoryManagementClient
from azure.mgmt.datafactory.models import *
from datetime import datetime, timedelta
import time

## 2. Create unformation functions 

In [None]:
def print_item(group):
    """Print an Azure object instance."""
    print("\tName: {}".format(group.name))
    print("\tId: {}".format(group.id))
    if hasattr(group, 'location'):
        print("\tLocation: {}".format(group.location))
    if hasattr(group, 'tags'):
        print("\tTags: {}".format(group.tags))
    if hasattr(group, 'properties'):
        print_properties(group.properties)


def print_properties(props):
    """Print a ResourceGroup properties instance."""
    if props and hasattr(props, 'provisioning_state') and props.provisioning_state:
        print("\tProperties:")
        print("\t\tProvisioning State: {}".format(props.provisioning_state))
    print("\n\n")


def print_activity_run_details(activity_run):
    """Print activity run details."""
    print("\n\tActivity run details\n")
    print("\tActivity run status: {}".format(activity_run.status))
    if activity_run.status == 'Succeeded':
        print("\tNumber of bytes read: {}".format(activity_run.output['dataRead']))
        print("\tNumber of bytes written: {}".format(activity_run.output['dataWritten']))
        print("\tCopy duration: {}".format(activity_run.output['copyDuration']))
    else:
        print("\tErrors: {}".format(activity_run.error['message']))

## 3. Authentificate Azure

In [None]:
# Azure subscription ID
subscription_id = 'insert subscription id here'

rg_name = 'ADFCookbook'

# In a shared lab environment, such as XtremeLabs, make sure this is globally unique ex. add a number or your initials
df_name = 'ADFCookbook-From-Python' 

credentials = ClientSecretCredential(
        client_id='insert application (client) id',
        client_secret='insert client secret value',
        tenant_id='insert directory (tenant) id'
)

rg_params = {'location':'eastus'}
df_params = {'location':'eastus'}

## 4. Create Data Factory

In [None]:
resource_client = ResourceManagementClient(credentials, subscription_id)
adf_client = DataFactoryManagementClient(credentials, subscription_id)
df_resource = Factory(location='eastus')
df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
print_item(df)

## 5. Created a linked service

In [None]:
# Create an Azure Storage linked service
ls_name = 'ADFCookbookLinkedServicePython'

# IMPORTANT: specify the name and key of your Azure Storage account.
# Optionally, override the full value with your connection string
storage_string = SecureString(value='DefaultEndpointsProtocol=https;AccountName=insert account name here;AccountKey=insert account key here;EndpointSuffix=core.windows.net')

ls_azure_storage = LinkedServiceResource(properties=AzureStorageLinkedService(connection_string=storage_string))
ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name, ls_azure_storage)
print_item(ls)

## 6. Create input dataset

In [None]:
ds_name = 'ADFCookbookDS-Input-Python'
ds_ls = LinkedServiceReference(reference_name=ls_name)
blob_path= 'adfcookbook/input'
blob_filename = 'SalesOrders.txt'
ds_azure_blob= DatasetResource(properties=AzureBlobDataset(linked_service_name=ds_ls, folder_path=blob_path, file_name = blob_filename))
ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name, ds_azure_blob)
print_item(ds)

## 7. Create output dataset

In [None]:
dsOut_name = 'ADFCookbookDS-Output-Python'
output_blobpath = 'adfcookbook/output'
dsOut_azure_blob = DatasetResource(properties=AzureBlobDataset(linked_service_name=ds_ls, folder_path=output_blobpath))
dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name, dsOut_azure_blob)
print_item(dsOut)

## 8. Create a pipeline

In [None]:
# Create a copy activity
act_name = 'ADFCookbookCopyData'
blob_source = BlobSource()
blob_sink = BlobSink()
dsin_ref = DatasetReference(reference_name=ds_name)
dsOut_ref = DatasetReference(reference_name=dsOut_name)
copy_activity = CopyActivity(name=act_name,inputs=[dsin_ref], outputs=[dsOut_ref], source=blob_source, sink=blob_sink)

#Create a pipeline with the copy activity
p_name = 'ADFCookbookCopyDataPipeline'
params_for_pipeline = {}
p_obj = PipelineResource(activities=[copy_activity], parameters=params_for_pipeline)
p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
print_item(p)

## 9. Create a pipeline run

In [None]:
run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name, parameters={})

## 10. Monitor a pipeline run

In [None]:
pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name, run_response.run_id)
print("\n\tPipeline run status: {}".format(pipeline_run.status))