# Automated ML

In [1]:
import pickle
import requests
import json
import numpy as np

from azureml.core import Workspace, Experiment
from azureml.core.dataset import Dataset

from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails

from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

from azureml.core.webservice import AciWebservice, Webservice

## Dataset

### Overview
TODO: In this markdown cell, give an overview of the dataset you are using. Also mention the task you will be performing.
The dataset used for this project is a credit card fraud dataset, which can be obtained from Kaggle (https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud?resource=download). 

The dataset contains transactions made by credit cards in September 2013 by European cardholders.
This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all transactions.

In [5]:
ws = Workspace.from_config()

exp=Experiment(workspace = ws, name = "AutoMLRun")
project_folder = './capstone-project'

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: quick-starts-ws-199865
Azure region: southcentralus
Subscription id: 1b944a9b-fdae-4f97-aeb1-b7eea0beac53
Resource group: aml-quickstarts-199865


In [6]:
# Retrieve data from datasets
key = "CCFraud"
description_text = "Credit card fraud dataset"

try:
    dataset = ws.datasets[key]
except:
    print("Dataset not found in Azure datasets.") 


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284806.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.841366,3.918649e-15,5.682686e-16,-8.761736e-15,2.811118e-15,-1.552103e-15,2.04013e-15,-1.698953e-15,-1.958151e-16,-3.14764e-15,...,1.471982e-16,8.042109e-16,5.28245e-16,4.458267e-15,1.426896e-15,1.70164e-15,-3.671606e-16,-1.218152e-16,88.349619,0.001727
std,47488.22833,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.25,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84691.5,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.75,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [7]:
cluster_name = "Capstone-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                            max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded.....................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [8]:
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC Weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="Class",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [10]:
#Submitting experiment
remote_run = exp.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
AutoMLRun,AutoML_67091cb6-105a-4752-8714-552f867c2ed7,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

In [11]:
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

In [12]:
best_model = remote_run.get_best_child()

best_model.get_details()

{'runId': 'AutoML_67091cb6-105a-4752-8714-552f867c2ed7_1',
 'target': 'Capstone-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-06-30T13:34:54.176519Z',
 'endTimeUtc': '2022-06-30T13:37:44.651788Z',
 'services': {},
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': 'c7af0367625be6ac5c2fecbfc72ed444cb7a2111',
  'pipeline_spec': '{"objects":[{"spec_class":"preproc","class_name":"MaxAbsScaler","module":"sklearn.preprocessing","param_args":[],"param_kwargs":{},"prepared_kwargs":{}},{"spec_class":"sklearn","class_name":"XGBoostClassifier","module":"automl.client.core.common.model_wrappers","param_args":[],"param_kwargs":{"tree_method":"auto"},"prepared_kwargs":{}}],"pipeline_id":"c7af0367625be6ac5c2fecbfc72ed444cb7a2111","module":"sklearn.pipeline","class_name":"Pipeline"}',
  'training_percent': '100',
  'predicted_cost': None,
  'iteration': '1',
  '_aml_system_scenario_identification': 'Remote.Child',
  'run_preprocessor': 'MaxAbsScaler',
  'run_algorithm': 'XGBoos

In [13]:
best_model.get_metrics()

{'precision_score_weighted': 0.9997191899986524,
 'recall_score_weighted': 0.9997191109862715,
 'log_loss': 0.002671558268820364,
 'f1_score_micro': 0.9997191109862715,
 'accuracy': 0.9997191109862715,
 'matthews_correlation': 0.9146033705757837,
 'weighted_accuracy': 0.9999995150801566,
 'AUC_weighted': 0.9547998634264148,
 'f1_score_macro': 0.9554852221753334,
 'precision_score_micro': 0.9997191109862715,
 'recall_score_micro': 0.9997191109862715,
 'balanced_accuracy': 0.9183673469387755,
 'AUC_macro': 0.9548240054322235,
 'f1_score_weighted': 0.9997066467859798,
 'AUC_micro': 0.9998441684095324,
 'average_precision_score_weighted': 0.9996692116249072,
 'norm_macro_recall': 0.8367346938775511,
 'recall_score_macro': 0.9183673469387755,
 'average_precision_score_macro': 0.9424281616338406,
 'average_precision_score_micro': 0.9998178373104294,
 'precision_score_macro': 0.9998593530239099,
 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_67091cb6-105a-4752-8714-552f867c2ed

In [14]:
best_model

Experiment,Id,Type,Status,Details Page,Docs Page
AutoMLRun,AutoML_67091cb6-105a-4752-8714-552f867c2ed7_1,,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [15]:
#Save the best model
model = remote_run.register_model(model_name='capstoneAutoML')

Performing interactive authentication. Please follow the instructions on the terminal.


To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code R99Q82MM5 to authenticate.


Interactive authentication successfully completed.


## Model Deployment

In [17]:
from azureml.core.model import Model
service_name = 'credit-card-fraud-detection'

inference_config = InferenceConfig(entry_script='score.py')
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                          name=service_name,
                          models=[model],
                          inference_config=inference_config,
                          deployment_config=aci_config,
                          overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-06-30 14:22:03+00:00 Creating Container Registry if not exists..
2022-06-30 14:32:04+00:00 Use the existing image.
2022-06-30 14:32:05+00:00 Submitting deployment to compute..
2022-06-30 14:32:08+00:00 Checking the status of deployment credit-card-fraud-detection..
2022-06-30 14:34:32+00:00 Checking the status of inference endpoint credit-card-fraud-detection.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [18]:
#Scoring URL
url = service.scoring_uri
print(url)

http://fb4acc74-26df-45b3-9987-e1ce9376e576.southcentralus.azurecontainer.io/score


In [27]:
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [29]:
df1 = df.drop("Class", axis = 1).copy()

In [31]:
df1

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.524980,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.208038,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,1.475829,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.059616,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.001396,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.127434,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00


In [37]:
data = df1.sample(n = 2)
data

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
252471,155821.0,-0.345037,0.520764,1.24359,0.403201,0.407146,0.283724,0.430051,0.326838,0.328181,...,-0.280023,-0.385672,-1.004748,0.122267,-0.520983,-0.791973,-1.178253,0.228547,0.195569,8.57
158170,111045.0,1.99392,-0.212678,-0.190338,0.544236,-0.685034,-0.778817,-0.581643,-0.139032,2.732703,...,-0.326391,-0.278648,-0.465507,0.362148,-0.165827,-0.410866,-0.958892,0.026353,-0.034607,6.95


In [38]:
import json
print(json.dumps(json.loads(data.reset_index().to_json(orient = 'records')), indent = 2))

[
  {
    "index": 252471,
    "Time": 155821.0,
    "V1": -0.3450371675,
    "V2": 0.5207643258,
    "V3": 1.2435901783,
    "V4": 0.4032006236,
    "V5": 0.4071461907,
    "V6": 0.2837239254,
    "V7": 0.4300506953,
    "V8": 0.3268381899,
    "V9": 0.3281805526,
    "V10": -0.5486692068,
    "V11": -0.3589198935,
    "V12": 0.1404390076,
    "V13": -1.8743004696,
    "V14": 0.1486790469,
    "V15": -2.1198787441,
    "V16": -0.1792942454,
    "V17": -0.3556698937,
    "V18": -0.0062784644,
    "V19": 0.4950565273,
    "V20": -0.2800230707,
    "V21": -0.3856716093,
    "V22": -1.0047476568,
    "V23": 0.1222672739,
    "V24": -0.5209829784,
    "V25": -0.7919734154,
    "V26": -1.1782531595,
    "V27": 0.2285468733,
    "V28": 0.1955691785,
    "Amount": 8.57
  },
  {
    "index": 158170,
    "Time": 111045.0,
    "V1": 1.9939196853,
    "V2": -0.2126777219,
    "V3": -0.1903376673,
    "V4": 0.544235893,
    "V5": -0.6850343577,
    "V6": -0.7788172923,
    "V7": -0.5816427785,
   

In [39]:
# Data copied from above print statement that we can use to get results from deployed model. 
data = {"data":
        [
  {
    "Time": 155821.0,
    "V1": -0.3450371675,
    "V2": 0.5207643258,
    "V3": 1.2435901783,
    "V4": 0.4032006236,
    "V5": 0.4071461907,
    "V6": 0.2837239254,
    "V7": 0.4300506953,
    "V8": 0.3268381899,
    "V9": 0.3281805526,
    "V10": -0.5486692068,
    "V11": -0.3589198935,
    "V12": 0.1404390076,
    "V13": -1.8743004696,
    "V14": 0.1486790469,
    "V15": -2.1198787441,
    "V16": -0.1792942454,
    "V17": -0.3556698937,
    "V18": -0.0062784644,
    "V19": 0.4950565273,
    "V20": -0.2800230707,
    "V21": -0.3856716093,
    "V22": -1.0047476568,
    "V23": 0.1222672739,
    "V24": -0.5209829784,
    "V25": -0.7919734154,
    "V26": -1.1782531595,
    "V27": 0.2285468733,
    "V28": 0.1955691785,
    "Amount": 8.57
  },
  {
    "Time": 111045.0,
    "V1": 1.9939196853,
    "V2": -0.2126777219,
    "V3": -0.1903376673,
    "V4": 0.544235893,
    "V5": -0.6850343577,
    "V6": -0.7788172923,
    "V7": -0.5816427785,
    "V8": -0.1390316815,
    "V9": 2.7327030264,
    "V10": -0.5753256242,
    "V11": 0.0383832187,
    "V12": -2.4413618114,
    "V13": 0.9508950913,
    "V14": 1.6623407131,
    "V15": 0.4188089843,
    "V16": 0.2472853786,
    "V17": 0.0984708432,
    "V18": 0.4025125774,
    "V19": -0.3235044006,
    "V20": -0.3263909087,
    "V21": -0.2786476917,
    "V22": -0.4655072943,
    "V23": 0.3621478718,
    "V24": -0.1658266757,
    "V25": -0.4108663209,
    "V26": -0.9588919238,
    "V27": 0.0263528098,
    "V28": -0.0346071683,
    "Amount": 6.95
  }
]
      }
# Convert to JSON string
json_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(json_data)

# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
resp = requests.post(url, json_data, headers=headers)
print(resp.json())

[0, 0]


In [40]:
# load existing web service
service = Webservice(name=service_name, workspace=ws)
logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

2022-06-30T14:34:23,955135500+00:00 - iot-server/run 
2022-06-30T14:34:23,961036700+00:00 - gunicorn/run 
2022-06-30T14:34:23,962452200+00:00 | gunicorn/run | 
2022-06-30T14:34:23,977340200+00:00 | gunicorn/run | ###############################################
2022-06-30T14:34:23,989619100+00:00 | gunicorn/run | AzureML Container Runtime Information
2022-06-30T14:34:23,988842000+00:00 - nginx/run 
2022-06-30T14:34:23,999543700+00:00 - rsyslog/run 
2022-06-30T14:34:23,999655000+00:00 | gunicorn/run | ###############################################
2022-06-30T14:34:24,007732200+00:00 | gunicorn/run | 
2022-06-30T14:34:24,033571300+00:00 | gunicorn/run | 
2022-06-30T14:34:24,050812900+00:00 | gunicorn/run | AzureML image information: openmpi3.1.2-ubuntu18.04:20220516.v1
2022-06-30T14:34:24,067652700+00:00 | gunicorn/run | 
2022-06-30T14:34:24,076158800+00:00 | gunicorn/run | 
2022-06-30T14:34:24,087334100+00:00 | gunicorn/run | PATH environment variable: /azureml-envs/azureml_76f657337a18

In [41]:
service.delete()
compute_target.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
