In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.57


In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

image-test	japaneast	docs-aml


In [3]:
from azureml.core import Experiment
experiment_name = 'hinanoORneru'

exp = Experiment(workspace=ws, name=experiment_name)

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. cpucluster


In [5]:
import os
script_folder = os.path.join(os.getcwd(), 'hinanoORneru')
os.makedirs(script_folder, exist_ok=True)

In [6]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib

from azureml.core import Run 
from utils import load_data

import beautiful_women

test_women = beautiful_women.load_beautiful_woman('../images/test/')
train_women = beautiful_women.load_beautiful_woman('../images/train/')

parser = argparse.ArgumentParser()
# parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
# parser.add_argument('--test', type=BeautifulWomen, dest='test')
# parser.add_argument('--train', type=BeautifulWomen, dest='train')
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regulation rate')
args = parser.parse_args()

X_train = train_women.data
X_test = test_women.data
y_train = train_women.target
y_test = train_women.target

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

run = Run.get_context()
print('Train a logistic regression model with regularization rate of', args.reg)
clf = LogisticRegression(C=1.0/args.reg, solver="liblinear", multi_class="auto", random_state=42)
clf.fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_test)

acc = np.average(y_hat == y_test)
print('Accuracy is ', acc)

run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))

os.makedirs('outputs', exist_ok=True)
joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')

Overwriting /mnt/azmnt/code/Users/live.com#tomocha.marika/hinanoORneru/train.py


In [7]:
from azureml.train.sklearn import SKLearn
import beautiful_women

script_params = {
    '--regularization': 0.5
}

# script_params = {
#     '--test' : test_women,
#     '--train' : train_women,
#     '--regularization': 0.5
# }

est = SKLearn(source_directory=script_folder,
             script_params=script_params,
             compute_target=compute_target,
             entry_script='train.py')

In [8]:
import beautiful_women

run = exp.submit(config=est)
run

Experiment,Id,Type,Status,Details Page,Docs Page
hinanoORneru,hinanoORneru_1568029526_439fc67d,azureml.scriptrun,Starting,Link to Azure Portal,Link to Documentation


In [9]:
from azureml.widgets import RunDetails
RunDetails(run).show()

A Jupyter Widget

In [10]:
run.wait_for_completion(show_output=False)  # specify True for a verbose log

{'error': {'error': {'code': 'UserError',
   'debugInfo': {'message': "No module named 'PIL'",
    'stackTrace': '  File "/mnt/batch/tasks/shared/LS_root/jobs/image-test/azureml/hinanoorneru_1568029526_439fc67d/mounts/workspacefilestore/azureml/hinanoORneru_1568029526_439fc67d/azureml-setup/context_manager_injector.py", line 99, in execute_with_context\n    runpy.run_path(sys.argv[0], globals(), run_name="__main__")\n  File "/opt/miniconda/lib/python3.6/runpy.py", line 263, in run_path\n    pkg_name=pkg_name, script_name=fname)\n  File "/opt/miniconda/lib/python3.6/runpy.py", line 96, in _run_module_code\n    mod_name, mod_spec, pkg_name, script_name)\n  File "/opt/miniconda/lib/python3.6/runpy.py", line 85, in _run_code\n    exec(code, run_globals)\n  File "train.py", line 12, in <module>\n    import beautiful_women\n  File "/mnt/batch/tasks/shared/LS_root/jobs/image-test/azureml/hinanoorneru_1568029526_439fc67d/mounts/workspacefilestore/azureml/hinanoORneru_1568029526_439fc67d/beauti

In [11]:
print(run.get_metrics())

{}


In [22]:
#preview
count = 0 
sample_size = 10
plt.figure(figsize=(16,6))

for i in range(sample_size):
    print(test_women.target[i])
    print(test_women.data[i])    

0
[225 225 225 ... 216 215 215]
0
[155 155 156 ... 146 146 146]
0
[111 111 111 ... 254 254 254]
0
[197 195 192 ... 175 176 175]
0
[119 130 145 ... 129 133 135]
0
[223 223 223 ... 210 124  90]
0
[103 110 117 ... 135 135 134]
0
[147  86 189 ...   4   5   0]
0
[ 76 106 106 ... 221 221 159]
0
[169 169 169 ... 151 151 151]


<Figure size 1152x432 with 0 Axes>

In [11]:
run = exp.submit(config=est)
run

TypeError: Object of type 'BeautifulWomen' is not JSON serializable

In [12]:
import shutil
shutil.copy('utils.py', script_folder)

'/mnt/azmnt/code/Users/live.com#tomocha.marika/hinanoORneru/utils.py'

In [17]:
import os
data_folder = os.path.join(os.getcwd(), 'data')
os.makedirs(data_folder, exist_ok=True)

In [11]:
import os
script_folder = os.path.join(os.getcwd(), 'sklearn-mnist')
os.makedirs(script_folder, exist_ok=True)

In [11]:
import beautiful_women

test_women = beautiful_women.load_beautiful_woman('images/test/')
train_women = beautiful_women.load_beautiful_woman('images/train/')

print(test_women.data.shape)
print(train_women.data.shape)

~~~~~~~~hinanoの画像をNumpy形式に変換し、Listに格納中~~~~~~~~
~~~~~~~~neruの画像をNumpy形式に変換し、Listに格納中~~~~~~~~
------------ListをNumpy形式に変換中--------------
~~~~~~~~hinanoの画像をNumpy形式に変換し、Listに格納中~~~~~~~~
~~~~~~~~neruの画像をNumpy形式に変換し、Listに格納中~~~~~~~~
------------ListをNumpy形式に変換中--------------
(231,)
(675, 50176)


In [19]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

ds.upload(src_dir=data_folder, target_path='images',
         overwrite=True, show_progress=True)

AzureBlob imagetest4173729794 azureml-blobstore-6f9846d4-dc75-4cd5-a296-e560087facdc
Uploading an estimated of 0 files
Uploaded 0 files


$AZUREML_DATAREFERENCE_075ac617639d4b4cb7308da1f08a4608