In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.57


In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

image-test	japaneast	docs-aml


In [3]:
from azureml.core import Experiment
experiment_name = 'hinanoORneru'

exp = Experiment(workspace=ws, name=experiment_name)

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. cpucluster


In [5]:
import os
script_folder = os.path.join(os.getcwd(), '')
os.makedirs(script_folder, exist_ok=True)

In [6]:
%%writefile train.py
# %%writefile $script_folder/train.py

import argparse
import os
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib

from azureml.core import Run 
from utils import load_data
from PIL import Image

import os, glob

WOMEN = ["hinano", "neru"]


#美女クラス
class BeautifulWomen:
    def __init__(self, data, target, target_names, images):
        self.data = data
        self.target = target
        self.target_names = target_names
        self.images = images

    #キー(インスタンス変数)を取得するメソッド
    def keys(self):
        print("[data, target, target_names, images]")
        


def load_beautiful_woman(dir):
    data = []
    target = []
    target_names = ["hinano", "neru"]
    images = []
    
    for label, woman in enumerate(WOMEN):
        file_dir = dir + woman
        files = glob.glob(file_dir + "/*.jpeg")
        print("~~~~~~~~{}の画像をNumpy形式に変換し、Listに格納中~~~~~~~~".format(woman))
        for i, f in enumerate(files):
            img = Image.open(f)
            img = img.convert('L')          #画像をグレースケールに変換
            #img = img.resize((128, 128))    #画像サイズの変更
            imgdata = np.asarray(img)       #Numpy配列に変換
            images.append(imgdata)          #画像データ: 128*128の2次元配列
            data.append(imgdata.flatten())  #画像データ: 16,384の1次元配列
            target.append(label)            #正解ラベルを格納

    print("------------ListをNumpy形式に変換中--------------")
    data = np.array(data)
    target = np.array(target)
    target_names = np.array(target_names)
    images = np.array(images)
    #インスタンスを生成
    beautifulWomen = BeautifulWomen(data, target, target_names, images)

    return beautifulWomen


test_women = load_beautiful_woman('images/test/')
train_women = load_beautiful_woman('images/train/')

parser = argparse.ArgumentParser()
# parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
# parser.add_argument('--test', type=BeautifulWomen, dest='test')
# parser.add_argument('--train', type=BeautifulWomen, dest='train')
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regulation rate')
args = parser.parse_args()

X_train = train_women.data
X_test = test_women.data
y_train = train_women.target
y_test = train_women.target

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

run = Run.get_context()
print('Train a logistic regression model with regularization rate of', args.reg)
clf = LogisticRegression(C=1.0/args.reg, solver="liblinear", multi_class="auto", random_state=42)
clf.fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_test)

acc = np.average(y_hat == y_test)
print('Accuracy is ', acc)

run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))

os.makedirs('outputs', exist_ok=True)
joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')

Overwriting train.py


In [7]:
from azureml.train.sklearn import SKLearn

script_params = {
    '--regularization': 0.5
}


est = SKLearn(source_directory=script_folder,
             script_params=script_params,
             compute_target=compute_target,
             entry_script='train.py')

In [8]:
run = exp.submit(config=est)
run

Submitting /mnt/azmnt/code/Users/live.com#tomocha.marika directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


TrainingException: TrainingException:
	Message: ====================================================================

While attempting to take snapshot of /mnt/azmnt/code/Users/live.com#tomocha.marika
Your project exceeds the file limit of 2000.

====================================================================


	InnerException SnapshotException:
	Message: ====================================================================

While attempting to take snapshot of /mnt/azmnt/code/Users/live.com#tomocha.marika
Your project exceeds the file limit of 2000.

====================================================================


	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "====================================================================\n\nWhile attempting to take snapshot of /mnt/azmnt/code/Users/live.com#tomocha.marika\nYour project exceeds the file limit of 2000.\n\n====================================================================\n\n"
    }
}
	ErrorResponse 
{
    "error": {
        "message": "====================================================================\n\nWhile attempting to take snapshot of /mnt/azmnt/code/Users/live.com#tomocha.marika\nYour project exceeds the file limit of 2000.\n\n====================================================================\n\n"
    }
}

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

In [None]:
run.wait_for_completion(show_output=False)  # specify True for a verbose log

In [None]:
print(run.get_metrics())