# Automated ML

Import Dependencies. 

In [1]:
import logging

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import warnings
import os

# Squash warning messages for cleaner output in the notebook
warnings.showwarning = lambda *args, **kwargs: None

import azureml.core
from azureml.core import Experiment, Workspace, Dataset
from azureml.train.automl import AutoMLConfig
from datetime import datetime

In [2]:
print("The current version of the Azure ML SDK is", azureml.core.VERSION)

The current version of the Azure ML SDK is 1.18.0


In [3]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-127982
aml-quickstarts-127982
southcentralus
a1ebbd25-46bc-446b-9547-5acab9b0125a


In [4]:
experiment_name = 'automl-classification'

experiment=Experiment(ws, experiment_name)

In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "project-automl"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
# For a more detailed view of current AmlCompute status, use get_status().
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

Creating
Succeeded...........................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"
notebook127982 ComputeInstance Succeeded
project-automl AmlCompute Succeeded


## Dataset

### Overview
The Dataset is Breast-Cancer-Dataset downloaded from https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Coimbra. The dataset consist of 10 predictors, all quantitative, and a binary dependent variable, indicating the presence or absence of breast cancer.

The predictors are anthropometric data and parameters which can be gathered in routine blood analysis.
Prediction models based on these predictors, if accurate, can potentially be used as a biomarker of breast cancer.



In [6]:
found = False
key = "Breast-Cancer Dataset"

description_text = "BREAST CANCER DataSet for Udacity Course 3"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/AarthiAlagammai/project3/main/dataR2.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,57.301724,27.582111,97.793103,10.012086,2.694988,26.61508,10.180874,14.725966,534.647,1.551724
std,16.112766,5.020136,22.525162,10.067768,3.642043,19.183294,6.843341,12.390646,345.912663,0.499475
min,24.0,18.37,60.0,2.432,0.467409,4.311,1.65602,3.21,45.843,1.0
25%,45.0,22.973205,85.75,4.35925,0.917966,12.313675,5.474283,6.881763,269.97825,1.0
50%,56.0,27.662416,92.0,5.9245,1.380939,20.271,8.352692,10.82774,471.3225,2.0
75%,71.0,31.241442,102.0,11.18925,2.857787,37.3783,11.81597,17.755207,700.085,2.0
max,89.0,38.578759,201.0,58.46,25.050342,90.28,38.04,82.1,1698.44,2.0


In [7]:
from train import clean_data

x,y=clean_data(dataset)

   Age        BMI  Glucose  Insulin      HOMA   Leptin  Adiponectin  Resistin  \
0   48  23.500000       70    2.707  0.467409   8.8071     9.702400   7.99585   
1   83  20.690495       92    3.115  0.706897   8.8438     5.429285   4.06405   
2   82  23.124670       91    4.498  1.009651  17.9393    22.432040   9.27715   
3   68  21.367521       77    3.226  0.612725   9.8827     7.169560  12.76600   
4   86  21.111111       92    3.549  0.805386   6.6994     4.819240  10.57635   

     MCP.1  Classification  
0  417.114               1  
1  468.786               1  
2  554.697               1  
3  928.220               1  
4  773.920               1  
length_of_dataframe 116
Number of malignant cases 64
Number of benign cases 52
Rate of malignant tumors over all cases 55.172413793103445
   Age        BMI  Glucose  Insulin      HOMA   Leptin  Adiponectin  Resistin  \
0   48  23.500000       70    2.707  0.467409   8.8071     9.702400   7.99585   
1   83  20.690495       92    3.115  0.

## AutoML Configuration

1.The model is a classification taske so it uses AUC weighted as the primary metric 

2.The model uses 5 cross validation since it gives better performance than 3 cross validations

3.To constraint the usage a timeout of 30 minutes is specified

4.8 concurrent iterations is used which is the no of iterations to run in parallel

In [8]:
from azureml.train.automl import AutoMLConfig


automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    debug_log = 'automl_errors.log',
    task='classification',
    primary_metric='AUC_weighted',
    training_data=dataset,
    label_column_name='Classification',
    n_cross_validations=5,
    compute_target=compute_target,
    iterations=100,
    max_concurrent_iterations=8)

In [9]:
remote_run = experiment.submit(automl_config,show_output=True)

Running on remote.
Running on remote compute: project-automl
Parent Run ID: AutoML_f82e1bd9-e861-4081-9333-e62b2429c4e8

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

*************************************

## Run Details



In [10]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [11]:
remote_run.wait_for_completion(show_output=True)



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more abo

{'runId': 'AutoML_f82e1bd9-e861-4081-9333-e62b2429c4e8',
 'target': 'project-automl',
 'status': 'Completed',
 'startTimeUtc': '2020-11-26T13:32:37.037602Z',
 'endTimeUtc': '2020-11-26T14:14:12.719464Z',
 'properties': {'num_iterations': '100',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'project-automl',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"284a16b4-eae1-4737-81b3-c9b1f53ac371\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/AarthiAlagammai/project3/main/dataR2.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": true, \\\\\\"name\\\\\\":

## Best Model





In [12]:
best_run,model=remote_run.get_output()

In [13]:
import joblib
joblib.dump(model,'fitted_model.pkl')

['fitted_model.pkl']

In [14]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-classification,AutoML_f82e1bd9-e861-4081-9333-e62b2429c4e8_98,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [73]:
best_run.id

'AutoML_f82e1bd9-e861-4081-9333-e62b2429c4e8_98'

In [15]:
model._final_estimator

PreFittedSoftVotingClassifier(classification_labels=None,
                              estimators=[('48',
                                           Pipeline(memory=None,
                                                    steps=[('robustscaler',
                                                            RobustScaler(copy=True,
                                                                         quantile_range=[10,
                                                                                         90],
                                                                         with_centering=False,
                                                                         with_scaling=True)),
                                                           ('svcwrapper',
                                                            SVCWrapper(C=11.513953993264458,
                                                                       break_ties=False,
                                    

## Model Deployment

Since automl model shows a better accuracy than hyperdrive model the automl model is deployed

In [17]:
from azureml.core.model import Model
model=Model.register(workspace=ws,model_path="fitted_model.pkl",model_name="model_automl")

Registering model model_automl


In [18]:
model

Model(workspace=Workspace.create(name='quick-starts-ws-127982', subscription_id='a1ebbd25-46bc-446b-9547-5acab9b0125a', resource_group='aml-quickstarts-127982'), name=model_automl, id=model_automl:1, version=1, tags={}, properties={})

In [74]:

from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.model import Model

service_name = 'cancerclassification-env-service'

inference_config = InferenceConfig(entry_script='score.py', environment=best_run.get_environment())
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1,enable_app_insights=True)



In [76]:
from azureml.core.webservice import LocalWebservice
local_config=LocalWebservice.deploy_configuration(port=9000)
local_service=Model.deploy(ws,"test",[model],inference_config,local_config)
local_service.wait_for_deployment(show_output=True)

Downloading model model_automl:1 to /tmp/azureml_4aragmvg/model_automl/1
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry viennaglobal.azurecr.io
Logging into Docker registry viennaglobal.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM viennaglobal.azurecr.io/azureml/azureml_3948892c3f204a4877f0b9993b8e7795
 ---> e958b21f6008
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> 8ebc9ff0b217
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6ImExZWJiZDI1LTQ2YmMtNDQ2Yi05NTQ3LTVhY2FiOWIwMTI1YSIsInJlc291cmNlR3JvdXBOYW1lIjoiYW1sLXF1aWNrc3RhcnRzLTEyNzk4MiIsImFjY291bnROYW1lIjoicXVpY2stc3RhcnRzLXdzLTEyNzk4MiIsIndvcmtzcGFjZUlkIjoiNjg3ZDFjNzctZjE3Ni00YzdmLThhMmQtMGQzMDNiNjFhOTBhIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in c986f8c19eb4
 ---> ce5b9bfa0921
Step 4/5 : RUN mv '/var/azureml-app/tmpxeqat2b_.py' /var/azureml

In [75]:
service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [54]:
print(service.get_logs()) 

2020-11-26T14:47:08.6757913Z stdout F 2020-11-26T14:47:08,673310400+00:00 - rsyslog/run 
2020-11-26T14:47:08.6829208Z stdout F 2020-11-26T14:47:08,682211700+00:00 - iot-server/run 
2020-11-26T14:47:08.6919482Z stdout F 2020-11-26T14:47:08,691167300+00:00 - nginx/run 
2020-11-26T14:47:08.693949Z stderr F /usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2020-11-26T14:47:08.693949Z stderr F /usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2020-11-26T14:47:08.693949Z stderr F /usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2020-11-26T14:47:08.7044713Z stderr F /usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libssl.so.1.0.0: no version infor

In [55]:
print(service.state)

Healthy


In [56]:
print(service.scoring_uri)

http://a9ebe095-8168-4d86-878e-fd8be7d4f025.southcentralus.azurecontainer.io/score


In [64]:
import json
import requests
data={'data':[{"Age":48, "BMI":23.5,"Glucose":70,"Insulin":2.707,"HOMA":0.467408667,"Leptin":8.8071,"Adiponectin":9.7024,"Resistin":7.99585,"MCP.1":417.114}]}
test_data1=json.dumps(data)
print(test_data1)

{"data": [{"Age": 48, "BMI": 23.5, "Glucose": 70, "Insulin": 2.707, "HOMA": 0.467408667, "Leptin": 8.8071, "Adiponectin": 9.7024, "Resistin": 7.99585, "MCP.1": 417.114}]}


In [58]:
headers={'Content-Type':'application/json'}

In [65]:
response=requests.post(service.scoring_uri,data=test_data1,headers=headers)

In [67]:
print(response.text)

[1]


In [70]:
output=service.run(test_data1)

In [71]:
print(output)

[1]


In [77]:
service.delete()