In [5]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.18.0


In [7]:
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

CougsInAzure	westus2	CougsInAzure


In [8]:
experiment_name = 'TestModel1'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

In [9]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpu-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("found compute target: " + compute_name)
else:
    print("creating new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target: cpu-cluster


In [13]:
import os
script_folder = os.path.join(os.getcwd(), "TestModel1")
os.makedirs(script_folder, exist_ok=True)

In [11]:
%%writefile $script_folder/train.py

import os
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import joblib
from azureml.core import Workspace, Dataset, Run
import azureml.core

ws = Workspace.from_config()

df = Dataset.get_by_name(ws, name='Alpha_Dataset_Featurized')
df.to_pandas_dataframe()

X = df["Data"].tolist()
y = df["Label"].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2)

run = Run.get_context()

dt = DecisionTreeClassifier().fit(X_train, y_train)

clf = DecisionTreeClassifier().fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_test)

# calculate accuracy on the prediction
acc = np.average(y_hat == y_test)
print('Accuracy is', acc)

#run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/TestModel1.pkl')

Writing $script_folder/train.py


FileNotFoundError: [Errno 2] No such file or directory: '$script_folder/train.py'

In [2]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment("Test-env1")
cd = CondaDependencies.create(pip_packages=['azureml-defaults'], conda_packages = ['scikit-learn'])

env.python.conda_dependencies = cd

env.register(workspace = ws)