In [29]:
%store -r s3_bucket_name
s3_bucket_name

'sagemaker-cookbook-bucket'

In [30]:
%store -r prefix
prefix

'chapter06/input'

In [31]:
s3_data_source = \
f"s3://{s3_bucket_name}/{prefix}/training_data.csv"
output_target = f"s3://{s3_bucket_name}/{prefix}/output"
max_candidates = 25

In [32]:
import sagemaker
session = sagemaker.Session()
role = sagemaker.get_execution_role()

In [33]:
from sagemaker.automl.automl import AutoML

experiment = AutoML(
    role=role,
    sagemaker_session=session,
    target_attribute_name="label",
    output_path=output_target,
    max_candidates=25,
    max_runtime_per_training_job_in_seconds=1000,
    total_job_runtime_in_seconds=6000
)

In [34]:
experiment.fit(inputs=s3_data_source, logs=False, wait=False)

In [35]:
response = experiment.describe_auto_ml_job()
response

{'AutoMLJobName': 'automl-2021-05-21-11-24-23-889',
 'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:581320662326:automl-job/automl-2021-05-21-11-24-23-889',
 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
     'S3Uri': 's3://sagemaker-cookbook-bucket/chapter06/input/training_data.csv'}},
   'TargetAttributeName': 'label'}],
 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-cookbook-bucket/chapter06/input/output'},
 'RoleArn': 'arn:aws:iam::581320662326:role/SuperAdminRole',
 'AutoMLJobConfig': {'CompletionCriteria': {'MaxCandidates': 25,
   'MaxRuntimePerTrainingJobInSeconds': 1000,
   'MaxAutoMLJobRuntimeInSeconds': 6000},
  'SecurityConfig': {'EnableInterContainerTrafficEncryption': False}},
 'CreationTime': datetime.datetime(2021, 5, 21, 11, 24, 24, 21000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2021, 5, 21, 11, 24, 24, 21000, tzinfo=tzlocal()),
 'AutoMLJobStatus': 'InProgress',
 'AutoMLJobSecondaryStatus': 'Starting',
 'GenerateCa

In [36]:
from pprint import pprint
from time import sleep

In [37]:
%%time

status = "InProgress"

while status == "InProgress":
    response = experiment.describe_auto_ml_job()
    status = response['AutoMLJobStatus']
    secondary_status = response['AutoMLJobSecondaryStatus']
    
    print(f"{status} - {secondary_status}")

    sleep(15)

InProgress - Starting
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - 

In [38]:
response = experiment.describe_auto_ml_job()

In [39]:
delta = response['EndTime'] - response['CreationTime']
total_minutes = int(delta.total_seconds() / 60)
total_minutes

35

In [40]:
artifacts = response['AutoMLJobArtifacts']
artifacts

{'CandidateDefinitionNotebookLocation': 's3://sagemaker-cookbook-bucket/chapter06/input/output/automl-2021-05-21-11-24-23-889/sagemaker-automl-candidates/automl-2021-05-21-11-24-23-889-pr-1-87a141a5b40e4650abd19edc6f3/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb',
 'DataExplorationNotebookLocation': 's3://sagemaker-cookbook-bucket/chapter06/input/output/automl-2021-05-21-11-24-23-889/sagemaker-automl-candidates/automl-2021-05-21-11-24-23-889-pr-1-87a141a5b40e4650abd19edc6f3/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb'}

In [41]:
for s3_path in list(artifacts.values()):
    !aws s3 cp {s3_path} tmp/.


download: s3://sagemaker-cookbook-bucket/chapter06/input/output/automl-2021-05-21-11-24-23-889/sagemaker-automl-candidates/automl-2021-05-21-11-24-23-889-pr-1-87a141a5b40e4650abd19edc6f3/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb to tmp/SageMakerAutopilotCandidateDefinitionNotebook.ipynb
download: s3://sagemaker-cookbook-bucket/chapter06/input/output/automl-2021-05-21-11-24-23-889/sagemaker-automl-candidates/automl-2021-05-21-11-24-23-889-pr-1-87a141a5b40e4650abd19edc6f3/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb to tmp/SageMakerAutopilotDataExplorationNotebook.ipynb


In [42]:
best = experiment.best_candidate()
best

{'CandidateName': 'tuning-job-1-da61eaf8193b4bbba9-019-313d539c',
 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:f1',
  'Value': 0.8604900240898132},
 'ObjectiveStatus': 'Succeeded',
 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:581320662326:processing-job/automl-2021-05-21-11-24-23-889-db-1-177d60a5b7654af59aa9625caa0',
   'CandidateStepName': 'automl-2021-05-21-11-24-23-889-db-1-177d60a5b7654af59aa9625caa0'},
  {'CandidateStepType': 'AWS::SageMaker::TrainingJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:581320662326:training-job/automl-2021-05-21-11-24-23-889-automl-202-dpp1-1-3bc0d7ae36e14c',
   'CandidateStepName': 'automl-2021-05-21-11-24-23-889-automl-202-dpp1-1-3bc0d7ae36e14c'},
  {'CandidateStepType': 'AWS::SageMaker::TransformJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:581320662326:transform-job/automl-2021-05-21-11-24-23-889-automl-202-dpp1-csv-1-40f56230c5'

In [43]:
best['FinalAutoMLJobObjectiveMetric']

{'MetricName': 'validation:f1', 'Value': 0.8604900240898132}

In [44]:
autopilot_job_name = response['AutoMLJobName']
%store autopilot_job_name
autopilot_job_name

Stored 'autopilot_job_name' (str)


'automl-2021-05-21-11-24-23-889'