In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.2.0


In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code A7N632LAL to authenticate.
Interactive authentication successfully completed.
aure-ml-eus	eastus2	azureml-eus


In [3]:
from azureml.core import Experiment
experiment_name = 'CREDIT-CARD-EXPERIMENT'

exp = Experiment(workspace=ws, name=experiment_name)

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2020-04-17T10:48:35.847000+00:00', 'errors': None, 'creationTime': '2020-04-17T10:47:29.246792+00:00', 'modifiedTime': '2020-04-17T10:48:46.101574+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [29]:
credit_card_ds = Dataset.get_by_name(ws, name='credit-card-data')
credit_card_ds

{
  "source": [
    "('workspaceblobstore', 'UI/04-17-2020_101340_UTC/creditcard.csv')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "70f56a63-00f3-41f9-8a9a-1743921b2006",
    "name": "credit-card-data",
    "version": 1,
    "workspace": "Workspace.create(name='aure-ml-eus', subscription_id='23b3eeaf-8619-495c-b837-5dece9a29613', resource_group='azureml-eus')"
  }
}

In [18]:
script_folder = os.path.join(os.getcwd(), "credit_card_local")
os.makedirs(script_folder, exist_ok=True)

In [33]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np


from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib

from azureml.core import Run, Dataset


# let user feed in 2 parameters, the dataset to mount or download, and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')
args = parser.parse_args()


run = Run.get_context()

workspace = run.experiment.workspace

dataset_name = 'credit-card-data'

# Get a dataset by name
credit_card_ds = Dataset.get_by_name(workspace=workspace, name=dataset_name)

# Load a TabularDataset into pandas DataFrame
data = credit_card_ds.to_pandas_dataframe()

X_train = data.drop(['Class'], axis=1)
y_train = data['Class']


print('Train a logistic regression model with regularization rate of', args.reg)
clf = LogisticRegression(C=1.0/args.reg, solver="liblinear", multi_class="auto", random_state=42)
clf.fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_train)

# calculate accuracy on the prediction
acc = np.average(y_hat == y_train)
print('Accuracy is', acc)

run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')

Overwriting /mnt/batch/tasks/shared/LS_root/mounts/clusters/ajith-compute-instance1/code/users/avajrala23/workingfolder/credit_card_local/train.py


In [20]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment('my_env')
cd = CondaDependencies.create(pip_packages=['azureml-sdk','scikit-learn','azureml-dataprep[pandas,fuse]>=1.1.14'])
env.python.conda_dependencies = cd

In [25]:
from azureml.train.sklearn import SKLearn

script_params = {
    '--regularization': 0.5
}

est = SKLearn(source_directory=script_folder,
              script_params=script_params,
              compute_target=compute_target,
              environment_definition=env, 
              entry_script='train.py')



In [34]:
run = exp.submit(config=est)
run

Experiment,Id,Type,Status,Details Page,Docs Page
CREDIT-CARD-EXPERIMENT,CREDIT-CARD-EXPERIMENT_1587123016_a1871d76,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [35]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [36]:
run.wait_for_completion(show_output=False)  # specify True for a verbose log

{'runId': 'CREDIT-CARD-EXPERIMENT_1587123016_a1871d76',
 'target': 'cpucluster',
 'status': 'Completed',
 'startTimeUtc': '2020-04-17T11:30:53.621711Z',
 'endTimeUtc': '2020-04-17T11:31:56.730612Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '0a0ba6bd-6ede-4c64-b490-91ce7c053aee',
  'AzureML.DerivedImageName': 'azureml/azureml_c90c6bb84ec46174ab7a0b9206e7be8e',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': '70f56a63-00f3-41f9-8a9a-1743921b2006'}, 'consumptionDetails': {'type': 'Reference'}}],
 'runDefinition': {'script': 'train.py',
  'useAbsolutePath': False,
  'arguments': ['--regularization', '0.5'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpucluster',
  'dataReferences': {},
  'data': {},
  'jobName': None,
  'maxRunDurationSeconds': None,
  'nodeCount': 1,
  'environment': {'na

In [37]:
print(run.get_metrics())

{'regularization rate': 0.5, 'accuracy': 0.9992064801778047}


In [38]:
print(run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_6f8d439b7a2dc3bcc435ff31d62bc37967fcfe033c8c11e2458c75a449c8e40a_d.txt', 'azureml-logs/65_job_prep-tvmps_6f8d439b7a2dc3bcc435ff31d62bc37967fcfe033c8c11e2458c75a449c8e40a_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_6f8d439b7a2dc3bcc435ff31d62bc37967fcfe033c8c11e2458c75a449c8e40a_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/127_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/sklearn_mnist_model.pkl']


In [40]:
# register model
model = run.register_model(model_name='credit_card_model',
                           model_path='outputs/sklearn_mnist_model.pkl')
print(model.name, model.id, model.version, sep='\t')

credit_card_model	credit_card_model:1	1


In [41]:
# Optionally, delete the Azure Machine Learning Compute cluster
compute_target.delete()