# Automated ML

In [1]:
import pickle
import requests
import json
import numpy as np

from azureml.core import Workspace, Experiment
from azureml.core.dataset import Dataset

from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails

from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

from azureml.core.webservice import AciWebservice, Webservice

## Dataset

### Overview

The dataset used for this project is a credit card fraud dataset, which can be obtained from Kaggle (https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud?resource=download). 

The dataset contains transactions made by credit cards in September 2013 by European cardholders.
This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all transactions.

In [2]:
ws = Workspace.from_config()

exp=Experiment(workspace = ws, name = "AutoMLRun")
project_folder = './capstone-project'

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: quick-starts-ws-199962
Azure region: southcentralus
Subscription id: aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee
Resource group: aml-quickstarts-199962


In [3]:
# Retrieve data from datasets
key = "CCFraud"
description_text = "Credit card fraud dataset"

try:
    dataset = ws.datasets[key]
except:
    print("Dataset not found in Azure datasets.") 


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284806.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.841366,3.918649e-15,5.682686e-16,-8.761736e-15,2.811118e-15,-1.552103e-15,2.04013e-15,-1.698953e-15,-1.958151e-16,-3.14764e-15,...,1.471982e-16,8.042109e-16,5.28245e-16,4.458267e-15,1.426896e-15,1.70164e-15,-3.671606e-16,-1.218152e-16,88.349619,0.001727
std,47488.22833,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.25,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84691.5,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.75,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [4]:
cluster_name = "Capstone-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                            max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [7]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="Class",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [8]:
#Submitting experiment
remote_run = exp.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
AutoMLRun,AutoML_7c3bdcf4-d0aa-4442-ad96-ff5697b7a81d,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

In [9]:
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

In [11]:
best_model = remote_run.get_best_child()

best_model.get_details()

{'runId': 'AutoML_7c3bdcf4-d0aa-4442-ad96-ff5697b7a81d_38',
 'target': 'Capstone-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-07-01T14:32:21.543699Z',
 'endTimeUtc': '2022-07-01T14:32:25.32483Z',
 'services': {},
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': '__AutoML_Ensemble__',
  'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'AUC_weighted\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'AutoMLRun\',\'compute_target\':\'Capstone-cluster\',\'subscription_id\':\'aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_7c3bdcf4-d0aa-4442-ad96-ff5697b7a81d_38","experiment_name":"AutoMLRun","workspace_name":"quick-starts-ws-199962","subscription_id":"aa7cf

In [12]:
best_model.get_metrics()

{'f1_score_weighted': 0.9990551979332365,
 'matthews_correlation': 0.7138763281769754,
 'average_precision_score_weighted': 0.9996540565178734,
 'norm_macro_recall': 0.612033868133635,
 'recall_score_weighted': 0.9991222218320986,
 'log_loss': 0.003483984628583166,
 'AUC_weighted': 0.9742981476178361,
 'f1_score_micro': 0.9991222218320986,
 'average_precision_score_macro': 0.915029777814021,
 'precision_score_micro': 0.9991222218320986,
 'weighted_accuracy': 0.9997878191166103,
 'balanced_accuracy': 0.8060169340668175,
 'f1_score_macro': 0.8527214039790715,
 'precision_score_weighted': 0.9990464526090195,
 'recall_score_macro': 0.8060169340668175,
 'precision_score_macro': 0.9163326888146717,
 'accuracy': 0.9991222218320986,
 'AUC_macro': 0.9742985052771812,
 'average_precision_score_micro': 0.9999111918925514,
 'AUC_micro': 0.9999102380057424,
 'recall_score_micro': 0.9991222218320986,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_7c3bdcf4-d0aa-4442-ad96-ff5697b7a81

In [13]:
best_model

Experiment,Id,Type,Status,Details Page,Docs Page
AutoMLRun,AutoML_7c3bdcf4-d0aa-4442-ad96-ff5697b7a81d_38,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [14]:
#Save the best model
model = remote_run.register_model(model_name='capstoneAutoML')

## Model Deployment

In [15]:
from azureml.core.model import Model
service_name = 'credit-card-fraud-detection'

inference_config = InferenceConfig(entry_script='score.py')
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                          name=service_name,
                          models=[model],
                          inference_config=inference_config,
                          deployment_config=aci_config,
                          overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-07-01 14:45:40+00:00 Creating Container Registry if not exists.
2022-07-01 14:45:41+00:00 Use the existing image.
2022-07-01 14:45:42+00:00 Submitting deployment to compute.
2022-07-01 14:45:45+00:00 Checking the status of deployment credit-card-fraud-detection..
2022-07-01 14:46:21+00:00 Checking the status of inference endpoint credit-card-fraud-detection.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [16]:
#Scoring URL
url = service.scoring_uri
print(url)

http://daf18c7d-e4cd-4b2b-888a-aab5c80cc3f4.southcentralus.azurecontainer.io/score


In [17]:
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [19]:
df1 = df.drop("Class", axis = 1).copy()

In [24]:
data = df1.sample(n = 2)
data

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
205876,135951.0,2.116518,-1.7403,-0.800322,-1.595363,-1.450848,-0.273048,-1.312706,-0.008051,-1.183087,...,-0.315415,-0.076391,0.098612,0.14995,-0.498785,-0.322057,-0.171356,0.008236,-0.048116,86.4
11329,19722.0,-0.751955,0.41939,2.897022,-1.626232,-0.426212,-0.500991,0.370259,-0.157079,2.474989,...,-0.088109,-0.089796,0.332095,-0.274291,0.567979,0.33254,-0.846676,-0.064982,-0.148987,11.85


In [25]:
import json
print(json.dumps(json.loads(data.reset_index().to_json(orient = 'records')), indent = 2))

[
  {
    "index": 205876,
    "Time": 135951.0,
    "V1": 2.1165181332,
    "V2": -1.740299713,
    "V3": -0.8003217213,
    "V4": -1.5953633393,
    "V5": -1.4508481485,
    "V6": -0.2730484197,
    "V7": -1.3127060874,
    "V8": -0.0080513813,
    "V9": -1.1830870427,
    "V10": 1.7007320068,
    "V11": 0.2681694815,
    "V12": -0.5038517433,
    "V13": -0.1524266046,
    "V14": -0.2303878828,
    "V15": -0.6218823874,
    "V16": -0.1513421589,
    "V17": 0.1413059862,
    "V18": 0.6314270683,
    "V19": 0.1286805531,
    "V20": -0.3154149271,
    "V21": -0.0763914168,
    "V22": 0.0986124586,
    "V23": 0.1499504006,
    "V24": -0.49878451,
    "V25": -0.3220569216,
    "V26": -0.1713558675,
    "V27": 0.0082356381,
    "V28": -0.0481155703,
    "Amount": 86.4
  },
  {
    "index": 11329,
    "Time": 19722.0,
    "V1": -0.7519547499,
    "V2": 0.4193902013,
    "V3": 2.8970217737,
    "V4": -1.6262315982,
    "V5": -0.426211712,
    "V6": -0.5009910835,
    "V7": 0.3702590753,
    

In [26]:
# Data copied from above print statement that we can use to get results from deployed model. 
data = {"data":
[
  {
    "Time": 135951.0,
    "V1": 2.1165181332,
    "V2": -1.740299713,
    "V3": -0.8003217213,
    "V4": -1.5953633393,
    "V5": -1.4508481485,
    "V6": -0.2730484197,
    "V7": -1.3127060874,
    "V8": -0.0080513813,
    "V9": -1.1830870427,
    "V10": 1.7007320068,
    "V11": 0.2681694815,
    "V12": -0.5038517433,
    "V13": -0.1524266046,
    "V14": -0.2303878828,
    "V15": -0.6218823874,
    "V16": -0.1513421589,
    "V17": 0.1413059862,
    "V18": 0.6314270683,
    "V19": 0.1286805531,
    "V20": -0.3154149271,
    "V21": -0.0763914168,
    "V22": 0.0986124586,
    "V23": 0.1499504006,
    "V24": -0.49878451,
    "V25": -0.3220569216,
    "V26": -0.1713558675,
    "V27": 0.0082356381,
    "V28": -0.0481155703,
    "Amount": 86.4
  },
  {
    "Time": 19722.0,
    "V1": -0.7519547499,
    "V2": 0.4193902013,
    "V3": 2.8970217737,
    "V4": -1.6262315982,
    "V5": -0.426211712,
    "V6": -0.5009910835,
    "V7": 0.3702590753,
    "V8": -0.1570786256,
    "V9": 2.4749894764,
    "V10": -2.0941941834,
    "V11": 1.1366278144,
    "V12": -1.7418168766,
    "V13": 1.6741695701,
    "V14": 0.9189085472,
    "V15": 0.145320873,
    "V16": -0.860859589,
    "V17": 0.7119124063,
    "V18": -0.1377859465,
    "V19": -0.7209423444,
    "V20": -0.088108917,
    "V21": -0.0897957051,
    "V22": 0.3320948428,
    "V23": -0.274290798,
    "V24": 0.5679794066,
    "V25": 0.3325399056,
    "V26": -0.8466755257,
    "V27": -0.0649820512,
    "V28": -0.1489869577,
    "Amount": 11.85
  }
]
      }
# Convert to JSON string
json_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(json_data)

# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
resp = requests.post(url, json_data, headers=headers)
print(resp.json())

[0, 0]


In [23]:
# load existing web service
service = Webservice(name=service_name, workspace=ws)
logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

2022-07-01T14:46:12,167739700+00:00 - iot-server/run 
2022-07-01T14:46:12,168221300+00:00 - rsyslog/run 
2022-07-01T14:46:12,189753200+00:00 - gunicorn/run 
2022-07-01T14:46:12,191733100+00:00 - nginx/run 
2022-07-01T14:46:12,195174100+00:00 | gunicorn/run | 
2022-07-01T14:46:12,201719000+00:00 | gunicorn/run | ###############################################
2022-07-01T14:46:12,215228900+00:00 | gunicorn/run | AzureML Container Runtime Information
2022-07-01T14:46:12,229285400+00:00 | gunicorn/run | ###############################################
2022-07-01T14:46:12,230649200+00:00 | gunicorn/run | 
2022-07-01T14:46:12,236365800+00:00 | gunicorn/run | 
2022-07-01T14:46:12,239211200+00:00 | gunicorn/run | AzureML image information: openmpi3.1.2-ubuntu18.04:20220516.v1
2022-07-01T14:46:12,247845900+00:00 | gunicorn/run | 
2022-07-01T14:46:12,253763600+00:00 | gunicorn/run | 
2022-07-01T14:46:12,261133000+00:00 | gunicorn/run | PATH environment variable: /azureml-envs/azureml_76f657337a18

In [27]:
service.delete()
compute_target.delete()