In [3]:
from azureml.core import Workspace
ws = Workspace.from_config()
print('Workspace name:' + ws.name,
    'Azure region: ' + ws.location,
    'Subscription ID' + ws.subscription_id,
    'Resource Group:' + ws.resource_group
)

Workspace name:labuser79ml Azure region: koreacentral Subscription ID3f43e823-597d-46de-8acc-3385c3f14d12 Resource Group:mlops


In [5]:
#실험공간의 생성
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes-experiment')

In [9]:
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop('Y')

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)
print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [10]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]

for alpha in alphas:
    run = experiment.start_logging()
    run.log('alpha_value', alpha)

    model = Ridge(alpha=alpha)
    model.fit(X_train,y_train)

    y_pred = model.predict(X_test)
    rmse = math.sqrt(mean_squared_error(y_test,y_pred))
    run.log('rmse',rmse)

    model_name = 'model_alpha_' + str(alpha) + '.pkl'
    filename = 'outputs/' + model_name

    joblib.dump(value=model,filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

    print(f'{alpha} experiment completed')



0.1 experiment completed
0.2 experiment completed
0.3 experiment completed
0.4 experiment completed
0.5 experiment completed
0.6 experiment completed
0.7 experiment completed
0.8 experiment completed
0.9 experiment completed
1.0 experiment completed


In [11]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labuser79ml,Link to Azure Machine Learning studio,Link to Documentation


In [13]:
experiment.get_runs()

<generator object Run._rehydrate_runs at 0x7ff6484b5580>

In [12]:
# Best model 탐색 후 다운로드 

minium_rmse_runid = None
minium_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    if minium_rmse is None:
        minium_rmse = run_rmse
        minium_rmse_runid = run_id
    else:
        if run_rmse < minium_rmse:
            minium_rmse = run_rmse
            minium_rmse_runid = run_id

print('Best run_id:' + minium_rmse_runid)
print('Best rmse:' + str(minium_rmse))

Best run_id:b5e97d1c-fa7b-43b1-a121-d63d2260f6bd
Best rmse:56.605203313391435


In [15]:
from azureml.core import Run
best_run = Run(experiment=experiment, run_id = minium_rmse_runid)

best_run.download_file(name=str(best_run.get_file_names()[0]))

In [16]:
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])

"datastore.upload_files" is deprecated after version 1.0.69. Please use "FileDatasetFactory.upload_directory" instead. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 2 files
Uploading ./features.csv
Uploaded ./features.csv, 1 files out of an estimated total of 2
Uploading ./labels.csv
Uploaded ./labels.csv, 2 files out of an estimated total of 2
Uploaded 2 files


In [17]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration


model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}", 
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

Registering model diabetes-experiment-model
Name: diabetes-experiment-model
Version: 1
