In [None]:
!pip install boto3

In [19]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [20]:
prefix = 'feature-store/amazon-reviews/csv/balanced-raw-with-header/train'

balanced_raw_with_header_s3_uri = 's3://{}/{}/data.csv'.format(bucket, prefix)

print(balanced_raw_with_header_s3_uri)

s3://sagemaker-us-east-1-835319576252/feature-store/amazon-reviews/csv/balanced-raw-with-header/train/data.csv


In [21]:
prefix = 'models/amazon-reviews/autopilot'

autopilot_model_output_s3_uri = 's3://{}/{}'.format(bucket, prefix)

print(autopilot_model_output_s3_uri)


s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot


In [22]:
max_candidates = 3

job_config = {
    'CompletionCriteria': {
      'MaxRuntimePerTrainingJobInSeconds': 600,
      'MaxCandidates': max_candidates,
      'MaxAutoMLJobRuntimeInSeconds': 3600
    },
}

input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': '{}'.format(balanced_raw_with_header_s3_uri)
        }
      },
      'TargetAttributeName': 'is_positive_sentiment'
    }
]

output_data_config = {
    'S3OutputPath': '{}'.format(autopilot_model_output_s3_uri)
}

#problem_type = 'Regression'

#auto_ml_job_objective = {
#    'MetricName': 'MSE'
#}

## Launching the SageMaker AutoPilot job

We can now launch the job by calling the `create_auto_ml_job` API.

In [23]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-dm-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

AutoMLJobName: automl-dm-05-00-53-23


In [24]:
sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig=job_config,
#                      ProblemType=problem_type,
#                      AutoMLJobObjective=auto_ml_job_objective,
                      RoleArn=role)

{'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:835319576252:automl-job/automl-dm-05-00-53-23',
 'ResponseMetadata': {'RequestId': 'afa7e257-7593-4563-b881-660aa82b8c0b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'afa7e257-7593-4563-b881-660aa82b8c0b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '92',
   'date': 'Thu, 05 Mar 2020 00:53:25 GMT'},
  'RetryAttempts': 0}}

### Tracking the progress of the AutoPilot job
SageMaker AutoPilot job consists of four high-level steps : 
* Data Preprocessing, where the dataset is split into train and validation sets.
* Recommending Pipelines, where the dataset is analyzed and SageMaker AutoPilot comes up with a list of ML pipelines that should be tried out on the dataset.
* Automatic Feature Engineering, where SageMaker AutoPilot performs feature transformation on individual features of the dataset as well as at an aggregate level.
* ML pipeline selection and hyperparameter tuning, where the top performing pipeline is selected along with the optimal hyperparameters for the training algorithm (the last stage of the pipeline). 

In [25]:
# Sleep for a bit to ensure the AutoML job above has time to start
import time
time.sleep(3)

job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
job_status = job['AutoMLJobStatus']
job_sec_status = job['AutoMLJobSecondaryStatus']

if job_status not in ('Stopped', 'Failed'):
    while job_status in ('InProgress') and job_sec_status in ('AnalyzingData'):
        job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
        job_status = job['AutoMLJobStatus']
        job_sec_status = job['AutoMLJobSecondaryStatus']
        print(job_status, job_sec_status)
        sleep(30)
    print("Data analysis complete")
    
print(job)

InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress FeatureEngineering
Data analysis complete
{'AutoMLJobName': 'automl-dm-05-00-53-23', 'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:835319576252:automl-job/automl-dm-05-00-53-23', 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-us-east-1-835319576252/feature-store/amazon-reviews/csv/balanced-raw-with-header/train/data.csv'}}, 'TargetAttributeName': 'is_positive_sentiment'}], 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot'}, 'RoleArn': 'arn:aws:iam::835319576252:role/service-role/AmazonSageMaker-ExecutionRole-20191006T1

In [26]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
job_status = job['AutoMLJobStatus']
job_sec_status = job['AutoMLJobSecondaryStatus']
print(job_status)
print(job_sec_status)
if job_status not in ('Stopped', 'Failed'):
    while job_status in ('InProgress') and job_sec_status in ('FeatureEngineering'):
        job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
        job_status = job['AutoMLJobStatus']
        job_sec_status = job['AutoMLJobSecondaryStatus']
        print(job_status, job_sec_status)
        sleep(30)
    print("Feature engineering complete")
    
print(job)

InProgress
FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress FeatureEngineering
InProgress ModelTuning
Feature engineering complete
{'AutoMLJobName': 'automl-dm-05-00-53-23', 'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:835319576252:automl-job/automl-dm-05-00-53-23', 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-us-east-1-835319576252/feature-store/amazon-reviews/csv/balanced-raw-with-header/train/data.csv'}}, 'TargetAttributeName': 'is_positive_sentiment'}], 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-east-1-835319576252/mo

In [27]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
job_status = job['AutoMLJobStatus']
job_sec_status = job['AutoMLJobSecondaryStatus']
print(job_status)
print(job_sec_status)
if job_status not in ('Stopped', 'Failed'):
    while job_status in ('InProgress') and job_sec_status in ('ModelTuning'):
        job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
        job_status = job['AutoMLJobStatus']
        job_sec_status = job['AutoMLJobSecondaryStatus']
        print(job_status, job_sec_status)
        sleep(30)
    print("Model tuning complete")
    
print(job)

InProgress
ModelTuning
InProgress ModelTuning
InProgress ModelTuning
InProgress ModelTuning
InProgress ModelTuning
InProgress ModelTuning
InProgress ModelTuning
Completed MaxCandidatesReached
Model tuning complete
{'AutoMLJobName': 'automl-dm-05-00-53-23', 'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:835319576252:automl-job/automl-dm-05-00-53-23', 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-us-east-1-835319576252/feature-store/amazon-reviews/csv/balanced-raw-with-header/train/data.csv'}}, 'TargetAttributeName': 'is_positive_sentiment'}], 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot'}, 'RoleArn': 'arn:aws:iam::835319576252:role/service-role/AmazonSageMaker-ExecutionRole-20191006T135881', 'AutoMLJobConfig': {'CompletionCriteria': {'MaxCandidates': 3, 'MaxRuntimePerTrainingJobInSeconds': 600, 'MaxAutoMLJobRuntimeInSeconds': 3600}}, 'CreationTime': datetime.datetime(20

### Viewing all candidates explored by SageMaker AutoPilot
Once model tuning is complete, you can view all the candidates (pipeline evaluations with different hyperparameter combinations) that were explored by AutoML and sort them by their final performance metric.

In [28]:
candidates = sm.list_candidates_for_auto_ml_job(AutoMLJobName=auto_ml_job_name, 
                                                SortBy='FinalObjectiveMetricValue')['Candidates']
for index, candidate in enumerate(candidates):
    print(str(index) + "  " 
        + candidate['CandidateName'] + "  " 
        + str(candidate['FinalAutoMLJobObjectiveMetric']['Value']))

0  tuning-job-1-2504f97bd82442a9a9-001-0067a3c3  0.5003120303153992
1  tuning-job-1-2504f97bd82442a9a9-002-03db27a4  0.4996879994869232
2  tuning-job-1-2504f97bd82442a9a9-003-ac57e879  0.4996879994869232


## Inspect SageMaker AutoPilot trials with Amazon SageMaker Experiments
SageMaker AutoPilot automatically creates a new experiment, and pushes information for each trial. 

In [29]:
from sagemaker.analytics import ExperimentAnalytics, TrainingJobAnalytics

exp = ExperimentAnalytics(
    sagemaker_session=sess, 
    experiment_name=auto_ml_job_name + '-aws-auto-ml-job',
)

df = exp.dataframe()
df

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,_tuning_objective_metric,alpha,colsample_bytree,...,validation:binary_f_beta - Count,SageMaker.ModelName,SageMaker.ModelPrimary.DataUrl,SageMaker.ModelPrimary.Image,processor_module,sagemaker_program,sagemaker_submit_directory,input_channel_mode,job_name,label_col
0,tuning-job-1-2504f97bd82442a9a9-002-03db27a4-a...,tuning-job-1-2504f97bd82442a9a9-002-03db27a4-a...,arn:aws:sagemaker:us-east-1:835319576252:train...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:accuracy,0.538189,0.840489,...,,,,,,,,,,
1,tuning-job-1-2504f97bd82442a9a9-001-0067a3c3-a...,tuning-job-1-2504f97bd82442a9a9-001-0067a3c3-a...,arn:aws:sagemaker:us-east-1:835319576252:train...,382416733822.dkr.ecr.us-east-1.amazonaws.com/l...,1.0,ml.m5.4xlarge,50.0,validation:binary_classification_accuracy,,,...,1.0,,,,,,,,,
2,tuning-job-1-2504f97bd82442a9a9-003-ac57e879-a...,tuning-job-1-2504f97bd82442a9a9-003-ac57e879-a...,arn:aws:sagemaker:us-east-1:835319576252:train...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:accuracy,0.538189,0.840489,...,,,,,,,,,,
3,automl-dm--dpp1-csv-1-faee8d25d9e34a99a4627e44...,automl-dm--dpp1-csv-1-faee8d25d9e34a99a4627e44...,arn:aws:sagemaker:us-east-1:835319576252:trans...,,1.0,ml.m5.4xlarge,,,,,...,,automl-dm-05-00-53-23-automl-dm--dpp1-model-a4...,s3://sagemaker-us-east-1-835319576252/models/a...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,,,,,,
4,automl-dm--dpp0-rpb-1-64783d1075b24cb3a0bfcf58...,automl-dm--dpp0-rpb-1-64783d1075b24cb3a0bfcf58...,arn:aws:sagemaker:us-east-1:835319576252:trans...,,1.0,ml.m5.4xlarge,,,,,...,,automl-dm-05-00-53-23-automl-dm--dpp0-model-4f...,s3://sagemaker-us-east-1-835319576252/models/a...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,,,,,,
5,automl-dm--dpp2-rpb-1-fb6312fe10dc4bb68e3c17f0...,automl-dm--dpp2-rpb-1-fb6312fe10dc4bb68e3c17f0...,arn:aws:sagemaker:us-east-1:835319576252:trans...,,1.0,ml.m5.4xlarge,,,,,...,,automl-dm-05-00-53-23-automl-dm--dpp2-model-b7...,s3://sagemaker-us-east-1-835319576252/models/a...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,,,,,,
6,automl-dm--dpp1-1-d9dca7b83fa24a3f8f5ea88356b4...,automl-dm--dpp1-1-d9dca7b83fa24a3f8f5ea88356b4...,arn:aws:sagemaker:us-east-1:835319576252:train...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,...,,,,,candidate_data_processors.dpp1,candidate_data_processors.trainer,/opt/ml/input/data/code,,,
7,automl-dm--dpp2-1-daec0ef2bcec4674803e30502374...,automl-dm--dpp2-1-daec0ef2bcec4674803e30502374...,arn:aws:sagemaker:us-east-1:835319576252:train...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,...,,,,,candidate_data_processors.dpp2,candidate_data_processors.trainer,/opt/ml/input/data/code,,,
8,automl-dm--dpp0-1-cf56b610f2414d139e35710a6f25...,automl-dm--dpp0-1-cf56b610f2414d139e35710a6f25...,arn:aws:sagemaker:us-east-1:835319576252:train...,683313688378.dkr.ecr.us-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,...,,,,,candidate_data_processors.dpp0,candidate_data_processors.trainer,/opt/ml/input/data/code,,,
9,db-1-bdc6a20ab17f44758d34e3211a26f0c948066b0b5...,db-1-bdc6a20ab17f44758d34e3211a26f0c948066b0b5...,arn:aws:sagemaker:us-east-1:835319576252:proce...,,1.0,ml.m5.2xlarge,250.0,,,,...,,,,,,,,Pipe,automl-dm-05-00-53-23,is_positive_sentiment


## Viewing notebooks generated by SageMaker AutoPilot
Once data analysis is complete, SageMaker AutoPilot generates two notebooks: 
* Data exploration,
* Candidate definition.

In [30]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print(job)

{'AutoMLJobName': 'automl-dm-05-00-53-23', 'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:835319576252:automl-job/automl-dm-05-00-53-23', 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-us-east-1-835319576252/feature-store/amazon-reviews/csv/balanced-raw-with-header/train/data.csv'}}, 'TargetAttributeName': 'is_positive_sentiment'}], 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot'}, 'RoleArn': 'arn:aws:iam::835319576252:role/service-role/AmazonSageMaker-ExecutionRole-20191006T135881', 'AutoMLJobConfig': {'CompletionCriteria': {'MaxCandidates': 3, 'MaxRuntimePerTrainingJobInSeconds': 600, 'MaxAutoMLJobRuntimeInSeconds': 3600}}, 'CreationTime': datetime.datetime(2020, 3, 5, 0, 53, 25, 854000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2020, 3, 5, 1, 11, 56, 365000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2020, 3, 5, 1, 11, 56, 393000, tzinfo=tzlocal()),

### Let's copy all of the generated resources including the two notebooks.

In [31]:
generated_resources = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation'].rstrip('notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb')
generated_resources

's3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/sagemaker-automl-candidates/pr-1-01858f1904d043ecbd9122ce8a6897d540e9715fe1bb46f084070f1520'

In [32]:
!rm -rf ./generated_module
!rm -rf ./notebooks
!aws s3 cp --recursive $generated_resources .

download: s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/sagemaker-automl-candidates/pr-1-01858f1904d043ecbd9122ce8a6897d540e9715fe1bb46f084070f1520/generated_module/MANIFEST.in to generated_module/MANIFEST.in
download: s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/sagemaker-automl-candidates/pr-1-01858f1904d043ecbd9122ce8a6897d540e9715fe1bb46f084070f1520/generated_module/candidate_data_processors/trainer.py to generated_module/candidate_data_processors/trainer.py
download: s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/sagemaker-automl-candidates/pr-1-01858f1904d043ecbd9122ce8a6897d540e9715fe1bb46f084070f1520/generated_module/candidate_data_processors/sagemaker_serve.py to generated_module/candidate_data_processors/sagemaker_serve.py
download: s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/sagemaker-automl-

### In the file view, open the `notebooks/` and `generated_module/` folders.  Lots of useful information in there!

## Deploying the best candidate
Now that we have successfully completed the AutoML job on our dataset and visualized the trials, we can create a model from any of the trials with a single API call and then deploy that model for online or batch prediction using [Inference Pipelines](https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipelines.html). For this notebook, we deploy only the best performing trial for inference.

The best candidate is the one we're really interested in.

In [33]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_identifier = best_candidate['CandidateName']

print("Candidate name: " + best_candidate_identifier)
print("Metric name: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("Metric value: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))

Candidate name: tuning-job-1-2504f97bd82442a9a9-001-0067a3c3
Metric name: validation:binary_classification_accuracy
Metric value: 0.5003120303153992


In [34]:
best_candidate

{'CandidateName': 'tuning-job-1-2504f97bd82442a9a9-001-0067a3c3',
 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:binary_classification_accuracy',
  'Value': 0.5003120303153992},
 'ObjectiveStatus': 'Succeeded',
 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:835319576252:processing-job/db-1-bdc6a20ab17f44758d34e3211a26f0c948066b0b5e844e55accd37c8f0',
   'CandidateStepName': 'db-1-bdc6a20ab17f44758d34e3211a26f0c948066b0b5e844e55accd37c8f0'},
  {'CandidateStepType': 'AWS::SageMaker::TrainingJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:835319576252:training-job/automl-dm--dpp2-1-daec0ef2bcec4674803e30502374e376eb27de2111034',
   'CandidateStepName': 'automl-dm--dpp2-1-daec0ef2bcec4674803e30502374e376eb27de2111034'},
  {'CandidateStepType': 'AWS::SageMaker::TransformJob',
   'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:835319576252:transform-job/automl-dm--dpp2-rpb-1-fb6312fe10dc4b

We can see the containers and models composing the Inference Pipeline.

In [35]:
for container in best_candidate['InferenceContainers']:
    print(container['Image'])
    print(container['ModelDataUrl'])
    print('======================')

683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-sklearn-automl:0.1.0-cpu-py3
s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/data-processor-models/automl-dm--dpp2-1-daec0ef2bcec4674803e30502374e376eb27de2111034/output/model.tar.gz
382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest
s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/tuning/automl-dm--dpp2-ll/tuning-job-1-2504f97bd82442a9a9-001-0067a3c3/output/model.tar.gz
683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-sklearn-automl:0.1.0-cpu-py3
s3://sagemaker-us-east-1-835319576252/models/amazon-reviews/autopilot/automl-dm-05-00-53-23/data-processor-models/automl-dm--dpp2-1-daec0ef2bcec4674803e30502374e376eb27de2111034/output/model.tar.gz


In [36]:
model_name = 'automl-dm-model-' + timestamp_suffix

model_arn = sm.create_model(Containers=best_candidate['InferenceContainers'],
                            ModelName=model_name,
                            ExecutionRoleArn=role)

print('Best candidate model ARN: ', model_arn['ModelArn'])

Best candidate model ARN:  arn:aws:sagemaker:us-east-1:835319576252:model/automl-dm-model-05-00-53-23


Let's deploy the pipeline.

In [37]:
# EndpointConfig name
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())
epc_name = 'automl-dm-epc-' + timestamp_suffix

# Endpoint name
ep_name = 'automl-dm-ep-' + timestamp_suffix
variant_name = 'automl-dm-variant-' + timestamp_suffix

print(ep_name)
print(variant_name)

automl-dm-ep-05-01-12-37
automl-dm-variant-05-01-12-37


In [38]:
ep_config = sm.create_endpoint_config(EndpointConfigName = epc_name,
                                      ProductionVariants=[{'InstanceType':'ml.m4.xlarge',
                                                           'InitialInstanceCount':1,
                                                           'ModelName':model_name,
                                                           'VariantName':variant_name}])

create_endpoint_response = sm.create_endpoint(EndpointName=ep_name,
                                              EndpointConfigName=epc_name)
print(create_endpoint_response['EndpointArn'])

arn:aws:sagemaker:us-east-1:835319576252:endpoint/automl-dm-ep-05-01-12-37


In [39]:
%%time
sm.get_waiter('endpoint_in_service').wait(EndpointName=ep_name)

CPU times: user 127 ms, sys: 31.7 ms, total: 159 ms
Wall time: 7min 31s


In [40]:
resp = sm.describe_endpoint(EndpointName=ep_name)
status = resp['EndpointStatus']

print("Arn: " + resp['EndpointArn'])
print("Status: " + status)

Arn: arn:aws:sagemaker:us-east-1:835319576252:endpoint/automl-dm-ep-05-01-12-37
Status: InService


## Scoring the Best Candidate
Let's predict and score the validation set. We'll compute metrics ourselves just for fun.

In [41]:
sm_rt = boto3.Session().client('runtime.sagemaker', region_name=region)

In [42]:
#ep_name = ''

csv_line_predict_positive = """I loved it!  I wish there was a new season..."""
response = sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=csv_line_predict_positive)

response_body = response['Body'].read().decode("utf-8").strip()
response_body

'0'

In [43]:
csv_line_predict_negative = """This isn't good.  Complete waste of time."""
response = sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=csv_line_predict_negative)

response_body = response['Body'].read().decode("utf-8").strip()
response_body

'0'

# TODO:  Update test

In [44]:
for review_body in test['review_body']:
    print(review_body)

NameError: name 'test' is not defined

In [None]:
test['review_body'].shape

In [None]:
responses = [int(sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=review_body.replace(',', ' '))['Body'].read().decode("utf-8").strip())
             for review_body in test['review_body']]

In [None]:
tp = 0
fn = 0
tn = 0
fp = 0

# TODO:  Compare to test['is_positive_sentiment']
for index, response in enumerate(responses):
    if (test.iloc[index]['is_positive_sentiment'] == 1):
        if response == 1:
            # True positive
            tp = tp + 1
        else:
            # False negative
            fn = fn + 1
    else:
        if (test.iloc[index]['is_positive_sentiment'] == 0):
            # True negative
            tn = tn + 1
        else:
            # False positive
            fp = fp + 1

In [None]:
#Confusion matrix
print ("%d %d" % (tn, fp))
print ("%d %d" % (fn, tp))

accuracy  = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall    = tn / (tn + fn)
f1        = (2 * precision * recall)/(precision + recall)

print ("accuracy:\t%.4f\nprecision:\t%.4f\nrecall:\t\t%.4f\nf1:\t\t%.4f" % (accuracy, precision, recall, f1))