In [None]:
!pip install --upgrade azureml-sdk[notebooks,explain,automl,contrib]

In [1]:
import json
import logging

from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import os

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.interpret import ExplanationClient

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException



In [2]:
ws = Workspace(subscription_id='c463a189-ff51-4b8d-aa64-edea8b0527e5',
               resource_group='Heart',
               workspace_name='Heart1')

# Choose a name for the experiment.
experiment_name = 'Heart_D'

experiment = Experiment(ws, experiment_name)

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Run History Name'] = experiment_name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
Subscription ID,c463a189-ff51-4b8d-aa64-edea8b0527e5
Workspace,Heart1
Resource Group,Heart
Location,centralindia
Run History Name,Heart_D


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "cpu-cluster-4"

try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',
                                                           max_nodes=6)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
data = pd.read_csv('/content/data/heart.csv')

if not os.path.isdir('data'):
    os.mkdir('data')
    
ds = ws.get_default_datastore()
ds.upload(src_dir='./data', target_path='second_disease', overwrite=True, show_progress=True)

"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 1 files
Uploading ./data/heart.csv
Uploaded ./data/heart.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_fe941d86ffea4d5fbf45112ee45ee76f

In [5]:
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('second_disease/heart.csv'))

In [6]:
automl_settings = {
    "experiment_timeout_hours" : 0.6,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 5,
    "max_concurrent_iterations": 4,
    "max_cores_per_iteration": -1,
    "n_cross_validations": 2,
    "primary_metric": 'AUC_weighted',
    "featurization": 'auto',
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             compute_target=compute_target,
                             experiment_exit_score = 0.9984,
                             blocked_models = ['KNN','LinearSVM'],
                             enable_onnx_compatible_models=True,
                             validation_size=0.2,
                             training_data = train_data,
                             label_column_name = 'target',
                             **automl_settings
                            )

In [7]:
remote_run = experiment.submit(automl_config, show_output = True)

Submitting remote run.
No run_configuration provided, running on cpu-cluster-4 with default configuration
Running on remote compute: cpu-cluster-4


Experiment,Id,Type,Status,Details Page,Docs Page
Heart_D,AutoML_75d931b1-d849-410b-876f-5e45064f3a23,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Beginning model selection.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

********************************************************************************************

TYPE:  

In [8]:
best_run, fitted_model = remote_run.get_output()

In [11]:
import pickle

pickle_out = open("Heart_model.pkl", "wb") 
pickle.dump(fitted_model, pickle_out) 
pickle_out.close()