In [1]:
# 실험공간의 생성 - 애저에서 작업시에 이용
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes-experiment')

In [2]:
# 애저의 당뇨병 데이터셋을 호출
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

# 애저에는 데이터를 바로 데이터프레임으로 가져올 수 있도록 편의성 기능이 들어있음
x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe
y_df = x_df.pop('Y')
# df에서 pop() 메소드는 해당 컬럼을 기존 데이터에서 들어내서 입력해준다

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)
print(X_train)

ModuleNotFoundError: No module named 'azureml'

In [3]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
for alpha in alphas:
   run = experiment.start_logging()
#    실험마다 로그(기록)를 남기게 함
   run.log('alpha_value', alpha)

   model = Ridge(alpha=alpha)
   model.fit(X_train, y_train)

   y_pred = model.predict(X_test)
   rmse = math.sqrt(mean_squared_error(y_test, y_pred))
   run.log('rmse', rmse)

   model_name = 'model_alpha_' + str(alpha)
   filename = 'outputs/' + model_name

   joblib.dump(value=model, filename=filename)
   run.upload_file(name=model_name, path_or_stream=filename)
   # /outputs 폴더 아래 모델명으로 각각 저장되고, 클라우드에도 업로드

   print(f'{alpha} experiment completed')

SyntaxError: '(' was never closed (897958573.py, line 22)

In [None]:
# Best model 탐색 후 다운로드

minimun_rmse_runid = None
minimum_rmse = None

for  run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']
    
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimun_rmse_runid = run_id
    else:
        if minimum_rmse > run_rmse:
            minimum_rmse = run_rmse
            minimun_rmse_runid = run_id

print('Best run_id: '+ minimun_rmse_runid)
print('Best rmse: '+ str(minimum_rmse))

In [None]:
from azureml.core import Run

best_run = Run(experiment=experiment, run_id=minimun_rmse_runid)
best_run.download_file(name=str(best_run.get_file_names()[0]))

In [None]:
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')



datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)


input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])

In [None]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration




model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}",
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})



print('Name:', model.name)
print('Version:', model.version)

In [None]:
service_name = 'diabetes-service'

service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)

In [None]:
import json

input_payload = json.dumps({
    'data': X_train[0:2].values.tolist(),
    'method': 'predict'
})

output = service.run(input_payload)

print(output)