In [21]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [5]:
from azureml.core import Workspace # azure machine learning에 이미 설치되어있는 api
ws = Workspace.from_config() # 작업공간의 세팅값을 가져옴

In [7]:
# Print azure machine learning info
# 만약 파이썬을 통해서 한다면 아래 값을 다 찾아서 넣어줘야함함
print('Workspace name: '+ws.name, 'Azure region' + ws.location, 'Subscription ID' + ws.subscription_id, 'Resource Group:' + ws.resource_group)

Workspace name: kweont Azure regionkoreacentral Subscription ID9d48b691-f278-4011-a9b7-2f877276c682 Resource Group:kweont


In [10]:
# 실험 공간의 생성
# Azure Machine Learning에서 눌러서 해왔던 작업들을 코드로 구현!
from azureml.core import Experiment 
experiment = Experiment(workspace=ws, name='diabetes-experiment')
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,kweont,Link to Azure Machine Learning studio,Link to Documentation


In [14]:
from azureml.opendatasets import Diabetes 
from sklearn.model_selection import train_test_split # 데이터를 학습용, 실험용으로 나눌때 씀

In [18]:
# 데이터를 불러옴 / NaN값은 드롭함
x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop('Y') # label value, pop을 사용해서 데이터를 넘김
x_df

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6
0,59,2,32.1,101.00,157,93.2,38.0,4.00,4.8598,87
1,48,1,21.6,87.00,183,103.2,70.0,3.00,3.8918,69
2,72,2,30.5,93.00,156,93.6,41.0,4.00,4.6728,85
3,24,1,25.3,84.00,198,131.4,40.0,5.00,4.8903,89
4,50,1,23.0,101.00,192,125.4,52.0,4.00,4.2905,80
...,...,...,...,...,...,...,...,...,...,...
437,60,2,28.2,112.00,185,113.8,42.0,4.00,4.9836,93
438,47,2,24.9,75.00,225,166.0,42.0,5.00,4.4427,102
439,60,2,24.9,99.67,162,106.6,43.0,3.77,4.1271,95
440,36,1,30.0,95.00,201,125.2,42.0,4.79,5.1299,85


In [19]:
# Divide the train and test set
x_train, x_test, y_train, y_test = train_test_split(x_df,y_df, test_size=0.2, random_state = 66)
print(x_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [22]:
from sklearn.linear_model import Ridge # Will use linear_model
from sklearn.metrics import mean_squared_error # Evaluation model
from sklearn.externals import joblib
import math

In [23]:
# alphas 반복 value
alphas = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1] 

for alpha in alphas:
    # ML starting + saving the result data
    run = experiment.start_logging()
    run.log('alpha_value',alpha)

    # ML train
    model = Ridge(alpha=alpha)
    model.fit(x_train, y_train)

    # Test Evaluation
    y_pred = model.predict(x_test)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred)) # to see sqrt(mean_squared_error) value
    run.log('rmse',rmse)

    # Setting up the saving directory for the output file
    model_name = 'model_alpha_' + str(alpha) + '.pkl' # save the result
    filename = 'outputs/' + model_name  

    # Save the file
    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename) # upload to azure ML Cloud
    run.complete() 

    print(f'{alpha} experiment completed')

0.1 experiment completed
0.2 experiment completed
0.3 experiment completed
0.4 experiment completed
0.5 experiment completed
0.6 experiment completed
0.7 experiment completed
0.8 experiment completed
0.9 experiment completed
1 experiment completed


In [24]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,kweont,Link to Azure Machine Learning studio,Link to Documentation


In [25]:
# Find the best model (later to deploy to cloud)\

# 결과 데이터를 받아놓을 변수, rmse는 작을수록 좋음!(그래서 minimum을 찾는 것!)
minimum_rmse_runid = None 
minimum_rmse = None 

for run in experiment.get_runs(): # get_runs() => experiment의 결과값을 받아옴옴
    run_metrics = run.get_metrics()
    run_details = run.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse  = run_rmse
            minimum_rmse_runid = run_id 

print('Best run_id: ' + minimum_rmse_runid)
print('Best rmse: ' + str(minimum_rmse))

Best run_id: f629a04b-f38d-48c7-b7d1-5df0f65e64e7
Best rmse: 56.60520331339142


In [27]:
from azureml.core import Run # 가장 좋은 결과값을 다운받아서 사용함
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)

# Download the file
best_run.download_file(name=str(best_run.get_file_names()[0])) # 실험에 따라 여러 파일이 나올 수 있음, 현재는 [0]을 사용하여 첫번째만!

In [28]:
# best run model파일을 저장했다면
# 그 파일이 나오게 된 데이터 파일 또한 저장해야함!
from azureml.core import Dataset 

# numpy를 통해 scv파일로 저장함
np.savetxt('features.csv', x_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

# get_default_datastore()을 통해 데이터를 cloud로 올림
datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])


"datastore.upload_files" is deprecated after version 1.0.69. Please use "FileDatasetFactory.upload_directory" instead. See Dataset API change notice at https://aka.ms/dataset-deprecation.


In [29]:
# 위 모델을 등록 및 사용 방법
import sklearn

from azureml.core import Model # azureml의 model, 아래 model.register에서 필요한 정보들들
from azureml.core.resource_configuration import ResourceConfiguration

model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}", 
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

Registering model diabetes-experiment-model
Name: diabetes-experiment-model
Version: 1


In [30]:
# Model Deployment
service_name = 'diabetes-service'

service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True) # deployment start

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-11-29 18:00:17+00:00 Creating Container Registry if not exists..
2022-11-29 18:10:17+00:00 Registering the environment.
2022-11-29 18:10:19+00:00 Uploading autogenerated assets for no-code-deployment..
2022-11-29 18:10:22+00:00 Building image.

In [None]:
import json 

# json 형태의 데이터를 만들어서 예측값으로 전달하면
# 위에 배포되어있는 시스템에 이 데이터가 들어가서
# 배포된 시스템이 예측해서 결과를 output으로 알려줌!
input_payload = json.dumps({     
    'data': x_test.values.tolist(),     
    'method': 'predict' 
}) 

output = service.run(input_payload) 

print(output)

# 만약 사용자가 적다면 이 상태 그대로 사용 가능!
# 하지만 사용자가 많아진다면, 쿠보네티스와 연동해서 서버를 늘려줘야함!