In [34]:
from azureml.core import Workspace
ws = Workspace.from_config()
print('Workspace name:'+ ws.name,
        'Azure region:'+ ws.location,
        'Subscription ID:' +ws.subscription_id,
        'Resource group:' + ws.resource_group)

Workspace name:labuser29ml Azure region:koreacentral Subscription ID:27db5ec6-d206-4028-b5e1-6004dca5eeef Resource group:rg29


In [35]:
#실험공간을 준비
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes-experiment')

In [36]:
# 데이터를 준비
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split #데이터 자르기

x_df=Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna() #dropna() 중간에 null 값이나 없는 값들 drop시켜준다.
y_df = x_df.pop('Y') #pop 값을 꺼내서 저장해준다.

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)

print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [37]:
# 모델 훈련, 로그, 모델 파일 관리
from sklearn.linear_model import Ridge #모델 훈련 알고리즘. ridge은 a알파 값이 있음. 파라메타로 바꿔줘야해.
from sklearn.metrics import mean_squared_error #평가
from sklearn.externals import joblib #학습된 파일을 저장할때 pkl파일로 저장되는데 재사용 가능
import math

alphas = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]

for alpha in alphas:
    #실험의 기록
    run = experiment.start_logging() #기록1
    run.log('alpha_value', alpha)

    # print(alpha)
    #학습시키기 10번 반복하면서 학습.
    model = Ridge(alpha=alpha)
    model.fit(X_train, y_train)
    #예측 결과보기
    y_pred = model.predict(X_test)
    # mse = mean_squared_error(y_test, y_pred) #두값을 비교해서 mean_squar에 들어가서 평가하게됨
    rmse = math.sqrt(mean_squared_error(y_test, y_pred)) #r = 루트

    run.log('rmse', rmse) #기록2  alpha, rmse 각각의 실험의 기록이 남게됨.

    # print('model_alpha={0}, mse={1}'.format(alpha, mse))
    print('model_alpha={0}, rmse={1}'.format(alpha, rmse))

    #모델을 파일로 저장하는 부분
    # 실험의 기록 저장
    model_name = 'model_alhpa_' + str(alpha) + '.pkl'
    # 파일의 경로 설정
    filename = 'outputs/' + model_name

    joblib.dump(value=model, filename=filename)

    #Azure ML Service에 모델 파일을 업로드 하는 부분
    run.upload_file(name=model_name, path_or_stream=filename)

    run.complete()
    print(f'{alpha} experiment completed')

model_alpha=0.1, rmse=56.60520331339142
0.1 experiment completed
model_alpha=0.2, rmse=56.61060264545032
0.2 experiment completed
model_alpha=0.3, rmse=56.616243245483616
0.3 experiment completed
model_alpha=0.4, rmse=56.622107088710145
0.4 experiment completed
model_alpha=0.5, rmse=56.62817734275138
0.5 experiment completed
model_alpha=0.6, rmse=56.63443828302745
0.6 experiment completed
model_alpha=0.7, rmse=56.64087521475942
0.7 experiment completed
model_alpha=0.8, rmse=56.64747440101076
0.8 experiment completed
model_alpha=0.9, rmse=56.654222996253125
0.9 experiment completed
model_alpha=1, rmse=56.66110898499056
1 experiment completed


In [41]:
#실험객체 어디에 있는 지 확인해보기
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labuser29ml,Link to Azure Machine Learning studio,Link to Documentation


In [39]:
# Best Model을 탐색 후 다운로드
minimum_rmse = None
minimum_rmse_runid = None

for exp in experiment.get_runs():
    # print(exp.get_metrics()['rmse'])
    run_metrics = exp.get_metrics() #get_metrics 성능 지표, 로그, 통계 등과 같은 데이터를 추출하여 분석
    run_details = exp.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    #가장 낮은 rmse값을 가진 실행 ID를 구하는 부분
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    
    else:
        if run_rmse < minimum_rmse:
            minmum_rmse = run_rmse
            minimum_rmse_runid = run_id

print('Best run_id:' + minimum_rmse_runid)
print('Best run_id_rmse:' + str(minimum_rmse))

Best run_id:76ddece8-7439-4e25-add6-8ca64871bce5
Best run_id_rmse:56.654222996253125


In [40]:
# Best model 다운로드
from azureml.core import Run
best_run = Run(experiment=experiment, run_id = minimum_rmse_runid)
print(best_run.get_file_names())

best_run.download_file(name=str(best_run.get_file_names()[0]))

['model_alhpa_0.1.pkl', 'outputs/model_alhpa_0.1.pkl']


In [61]:
#데이터 저장
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

#데이터스토어 정보 가져오기
datastore= ws.get_default_datastore()

#데이터 스토어에 데이터 업로드, 저장
datastore.upload_files(files=['./features.csv','./labels.csv'],
                        target_path='diabetes-experiment/',
                        overwrite=True)

Uploading an estimated of 2 files
Uploading ./features.csv
Uploaded ./features.csv, 1 files out of an estimated total of 2
Uploading ./labels.csv
Uploaded ./labels.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_aaffd986b4b74deeb325395aea3ea76d

In [62]:
import os
os.listdir()

['.amlignore',
 '.amlignore.amltmp',
 '.ipynb_aml_checkpoints',
 '5_4 Azure ML.ipynb',
 '5_4 azure ml.ipynb.amltmp',
 'AutoMLGeneratedCode',
 'DiabetesExp.ipynb',
 'diabetesexp.ipynb.amltmp',
 'feature.csv',
 'features.csv',
 'labels.csv',
 'model_alhpa_0.1.pkl',
 'outputs']

In [64]:
feature_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,'diabetes-experiment/features.csv')])
label_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,'diabetes-experiment/labels.csv')])

In [55]:
import sklearn
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

In [68]:
#시스템에 모델의 등록
model = Model.register(workspace=ws,
                model_name='diabetes-experiment-model',
                model_path= f'./{str(best_run.get_file_names()[0])}',
                model_framework=Model.Framework.SCIKITLEARN,
                model_framework_version=sklearn.__version__,
                sample_input_dataset=feature_dataset,
                sample_output_dataset=label_dataset,
                resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                description='Ridge regression model to predict diabetes progression',
                tags={'area':'diabetes','type':'regression'}
                )

Registering model diabetes-experiment-model


In [69]:
print('Model name :', model.name)
print('Model version :', model.version)

Model name : diabetes-experiment-model
Model version : 1


In [70]:
#모델의 배포
service_name = 'diabetes-service'

service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(ws, service_name, [model], overwrite=True)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-05-08 02:15:32+00:00 Creating Container Registry if not exists..
2023-05-08 02:25:33+00:00 Registering the environment.
2023-05-08 02:25:34+00:00 Uploading autogenerated assets for no-code-deployment.
2023-05-08 02:25:36+00:00 Building image..
2023-05-08 02:35:49+00:00 Generating deployment configuration.
2023-05-08 02:35:50+00:00 Submitting deployment to compute..
2023-05-08 02:35:56+00:00 Checking the status of deployment diabetes-service..
2023-05-08 02:37:28+00:00 Checking the status of inference endpoint diabetes-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [71]:
#json 데이터 만들기(테스트는 json을 사용하기 때문) - 서비스 안에서 사용할 
import json
 
input_payload = json.dumps({
    'data': X_train[0:2].values.tolist(),
    'method': 'predict'
})
# tolist스타일로 만들고 predict해

output = service.run(input_payload)
 
print(output)

{'predict': [204.9450693706217, 74.4641225933554]}
