In [7]:
import sagemaker
import boto3
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
from urllib.parse import urlparse
import time

sess = sagemaker.Session()
bucket = "ads-508-team4"
prefix = "autopilot"
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

In [8]:
%store -r s3_private_path_data
print(s3_private_path_data)

s3://ads-508-team4/modeling_groups


In [9]:
%store -r s3_private_path_csv
print(s3_private_path_csv)

s3://ads-508-team4/olist/csv


In [10]:
%store -r s3_public_path_csv
print(s3_public_path_csv)

s3://ads-508-team4/olist/csv


In [11]:
%store -r target
print(target)

0             Good
1           Normal
2          Average
3           Normal
4        Excellent
           ...    
96091    Excellent
96092      Average
96093         Good
96094         Good
96095      Average
Name: spending_grp, Length: 96096, dtype: category
Categories (4, object): [Normal < Average < Good < Excellent]


In [12]:
s3_private_path_ap = "s3://ads-508-team4/autopilot".format(bucket)
print(s3_private_path_ap)

s3://ads-508-team4/autopilot


# Setting up Autopilot Job

In [13]:
input_data_config = [
    {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": "s3://ads-508-team4/modeling_groups/train_data.csv".format(bucket, prefix),
            }
        },
        "TargetAttributeName": "spending_grp",
    }
]

job_config = {"CompletionCriteria": {"MaxCandidates": 10}}


output_data_config = {"S3OutputPath": "s3://ads-508-team4/modeling_groups/output".format(bucket, prefix)}

# Launch Autopilot Job

In [14]:
from time import gmtime, strftime, sleep

timestamp_suffix = strftime("%Y%m%d-%H-%M", gmtime())

auto_ml_job_name = "automl-" + timestamp_suffix
print("AutoMLJobName: " + auto_ml_job_name)

sm.create_auto_ml_job(
    AutoMLJobName=auto_ml_job_name,
    InputDataConfig=input_data_config,
    OutputDataConfig=output_data_config,
    AutoMLJobConfig=job_config,
    RoleArn=role,
)

AutoMLJobName: automl-20220329-05-36


{'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:893959334162:automl-job/automl-20220329-05-36',
 'ResponseMetadata': {'RequestId': '0883675a-3146-4469-be13-f173c3175d1d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0883675a-3146-4469-be13-f173c3175d1d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '92',
   'date': 'Tue, 29 Mar 2022 05:36:16 GMT'},
  'RetryAttempts': 0}}

# Tracking Autopilot Job Progress

In [15]:
print("JobStatus - Secondary Status")
print("------------------------------")


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print(describe_response["AutoMLJobStatus"] + " - " + describe_response["AutoMLJobSecondaryStatus"])
job_run_status = describe_response["AutoMLJobStatus"]

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response["AutoMLJobStatus"]

    print(
        describe_response["AutoMLJobStatus"] + " - " + describe_response["AutoMLJobSecondaryStatus"]
    )
    sleep(60)

JobStatus - Secondary Status
------------------------------
InProgress - Starting
InProgress - Starting
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - Generatin

# Results

In [16]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)["BestCandidate"]
best_candidate_name = best_candidate["CandidateName"]

print("\n")
print("CandidateName: " + best_candidate_name)
print(
    "FinalAutoMLJobObjectiveMetricName: "
    + best_candidate["FinalAutoMLJobObjectiveMetric"]["MetricName"]
)
print(
    "FinalAutoMLJobObjectiveMetricValue: "
    + str(best_candidate["FinalAutoMLJobObjectiveMetric"]["Value"])
)
print("\nBest candidate details:: " + str(best_candidate))



CandidateName: automl-20220329-05-36vm5JlPIMb4j-001-b657f724
FinalAutoMLJobObjectiveMetricName: validation:accuracy
FinalAutoMLJobObjectiveMetricValue: 1.0

Best candidate details:: {'CandidateName': 'automl-20220329-05-36vm5JlPIMb4j-001-b657f724', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:accuracy', 'Value': 1.0}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:893959334162:processing-job/automl-20220329-05-36-db-1-670e1199375641759ccd343a28c2ad423049', 'CandidateStepName': 'automl-20220329-05-36-db-1-670e1199375641759ccd343a28c2ad423049'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:893959334162:training-job/automl-20220329-05-36-dpp8-1-cb12e28f4f5941d5876a87b4e1b8628705', 'CandidateStepName': 'automl-20220329-05-36-dpp8-1-cb12e28f4f5941d5876a87b4e1b8628705'}, {'CandidateStepType': 'AWS::SageMaker::Tra

In [17]:
sm_dict = sm.list_candidates_for_auto_ml_job(AutoMLJobName=auto_ml_job_name)
for item in sm_dict["Candidates"]:
    print(item["CandidateName"], item["FinalAutoMLJobObjectiveMetric"])
    print(item["InferenceContainers"][1]["Image"], "\n")

automl-20220329-05-36vm5JlPIMb4j-009-0843905d {'MetricName': 'validation:accuracy', 'Value': 1.0}
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1-cpu-py3 

automl-20220329-05-36vm5JlPIMb4j-003-13b8ecc5 {'MetricName': 'validation:accuracy', 'Value': 1.0}
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1-cpu-py3 

automl-20220329-05-36vm5JlPIMb4j-004-552acef6 {'MetricName': 'validation:accuracy', 'Value': 1.0}
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1-cpu-py3 

automl-20220329-05-36vm5JlPIMb4j-001-b657f724 {'MetricName': 'validation:accuracy', 'Value': 1.0}
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1-cpu-py3 

automl-20220329-05-36vm5JlPIMb4j-008-cf6b3bd0 {'MetricName': 'validation:accuracy', 'Value': 0.9330400228500366}
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1-cpu-py3 

automl-20220329-05-36vm5JlPIMb4j-010-b6f92db7 {'MetricName': 'validation:accuracy', 'Value': 1.0}
68

In [18]:
%store

Stored variables and their in-db values:
ingest_create_athena_db_passed             -> True
s3_private_path_csv                        -> 's3://ads-508-team4/olist/csv'
s3_private_path_data                       -> 's3://ads-508-team4/modeling_groups'
s3_public_path_csv                         -> 's3://ads-508-team4/olist/csv'
setup_dependencies_passed                  -> True
setup_iam_roles_passed                     -> True
setup_instance_check_passed                -> True
setup_s3_bucket_passed                     -> True
target                                     -> 0             Good
1           Normal
2          A


# Shutting down resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}