In [None]:
import sagemaker

print(sagemaker.__version__)
sess = sagemaker.Session()

In [None]:
%%sh
wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
unzip -o bank-additional.zip

In [None]:
bucket = sess.default_bucket()                     
prefix = 'sagemaker/DEMO-automl-dm/'
s3_input_data = sess.upload_data(path="./bank-additional/bank-additional-full.csv", key_prefix=prefix+'input')
print(s3_input_data)

In [None]:
! head bank-additional/bank-additional-full.csv

In [None]:
from sagemaker.automl.automl import AutoML
auto_ml_job = AutoML(
    role = sagemaker.get_execution_role(),                                          
    sagemaker_session = sess,                             
    target_attribute_name = 'y',                             
    output_path = 's3://{}/{}/output'.format(bucket,prefix),
    max_runtime_per_training_job_in_seconds = 600,
    max_candidates = 250,
    total_job_runtime_in_seconds = 3600
)

In [None]:
auto_ml_job.fit(inputs=s3_input_data, logs=False, wait=False)

In [None]:
from time import sleep

def wait_for(job, state):
    job = auto_ml_job.describe_auto_ml_job()
    job_status = job['AutoMLJobStatus']
    job_sec_status = job['AutoMLJobSecondaryStatus']
    if job_status not in ('Stopped', 'Failed'):
        while job_status in ('InProgress') and job_sec_status in state:
            sleep(60)
            job = auto_ml_job.describe_auto_ml_job()
            job_status = job['AutoMLJobStatus']
            job_sec_status = job['AutoMLJobSecondaryStatus']
            print (job_status, job_sec_status)

In [None]:
wait_for(auto_ml_job, 'AnalyzingData')

In [None]:
job = auto_ml_job.describe_auto_ml_job()
#print(job)
job_candidate_notebook = job['AutoMLJobArtifacts']['CandidateDefinitionNotebookLocation']
job_data_notebook = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation']

print(job_candidate_notebook)
print(job_data_notebook)

In [None]:
%%sh -s $job_candidate_notebook $job_data_notebook
aws s3 cp $1 .
aws s3 cp $2 .

In [None]:
wait_for(auto_ml_job, 'FeatureEngineering')

In [None]:
wait_for(auto_ml_job, 'ModelTuning')

In [None]:
import pandas as pd
from sagemaker.analytics import ExperimentAnalytics

job = auto_ml_job.describe_auto_ml_job()

exp = ExperimentAnalytics(
    experiment_name=job['AutoMLJobName']+'-aws-auto-ml-job',
    metric_names=['Objective:F1']
)
df = exp.dataframe()

print(df)
#print("Number of jobs: ", len(df))
#if (len(df) !=0):
#    df = pd.concat([df['Objective:f1'], df.drop(['ObjectiveMetric'], axis=1)], axis=1)
#    df.sort_values('ObjectiveMetric', ascending=0)[:5]

In [None]:
job_best_candidate = auto_ml_job.best_candidate()
print(job_best_candidate['CandidateName'])
print(job_best_candidate['FinalAutoMLJobObjectiveMetric'])

In [None]:
import boto3

job_outputs_prefix = '{}/output/{}'.format(prefix, job['AutoMLJobName'])
s3_bucket = boto3.resource('s3').Bucket(bucket)
s3_bucket.objects.filter(Prefix=job_outputs_prefix).delete()