In [1]:
from azureml.core import Workspace
ws = Workspace.from_config()
print('Workspace name:' + ws.name,
    'Azure region: ' + ws.location,
    'Subscription ID' + ws.subscription_id,
    'Resource Group:' + ws.resource_group
)

Workspace name:labuser26ml1 Azure region: westus Subscription ID7ae06d59-97e1-4a36-bbfe-efb081b9b03b Resource Group:rg26


In [2]:
#실험공간의 생성
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes-experiment')

In [3]:
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop('Y')

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)
print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [4]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]

for alpha in alphas:
    run = experiment.start_logging()
    run.log('alpha_value', alpha)

    model = Ridge(alpha=alpha)
    model.fit(X_train,y_train)

    y_pred = model.predict(X_test)
    rmse = math.sqrt(mean_squared_error(y_test,y_pred))
    run.log('rmse',rmse)

    model_name = 'model_alpha_' + str(alpha) + '.pkl'
    filename = 'outputs/' + model_name

    joblib.dump(value=model,filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

    print(f'{alpha} experiment completed')



In [5]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labuser26ml1,Link to Azure Machine Learning studio,Link to Documentation


In [6]:
experiment.get_runs()

<generator object Run._rehydrate_runs at 0x7fb31f82f660>

In [7]:
# Best model 탐색 후 다운로드 

minium_rmse_runid = None
minium_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    if minium_rmse is None:
        minium_rmse = run_rmse
        minium_rmse_runid = run_id
    else:
        if run_rmse < minium_rmse:
            minium_rmse = run_rmse
            minium_rmse_runid = run_id

print('Best run_id:' + minium_rmse_runid)
print('Best rmse:' + str(minium_rmse))

Best run_id:06b47421-ed8f-4eb5-b945-752e7ef92e8a
Best rmse:56.60520331339142


In [8]:
from azureml.core import Run
best_run = Run(experiment=experiment, run_id = minium_rmse_runid)

best_run.download_file(name=str(best_run.get_file_names()[0]))

In [9]:
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])

"datastore.upload_files" is deprecated after version 1.0.69. Please use "FileDatasetFactory.upload_directory" instead. See Dataset API change notice at https://aka.ms/dataset-deprecation.


In [10]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration


model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}", 
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

Registering model diabetes-experiment-model
Name: diabetes-experiment-model
Version: 1


In [11]:
service_name = 'diabetes-service'

service = Model.deploy(ws,service_name,[model],overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-11-04 05:23:13+00:00 Creating Container Registry if not exists..
2022-11-04 05:33:13+00:00 Registering the environment.
2022-11-04 05:33:17+00:00 Uploading autogenerated assets for no-code-deployment..
2022-11-04 05:33:24+00:00 Building image..
2022-11-04 05:43:40+00:00 Generating deployment configuration..
2022-11-04 05:43:41+00:00 Submitting deployment to compute..
2022-11-04 05:43:49+00:00 Checking the status of deployment diabetes-service..
2022-11-04 05:46:17+00:00 Checking the status of inference endpoint diabetes-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [12]:
import json 
input_payload = json.dumps({
        'data': X_train[0:2].values.tolist(), # X의 데이터 3개를 예측하라고 건내줌 , tolist는 리스트로 변환하는 메서드
        'method': 'predict' 
}) 

output = service.run(input_payload) 

print(output)

{'predict': [204.9450693706217, 74.4641225933554]}
