## UFO Sightings Evaluation and Optimization Lab

### Goal: Run the AWS Console-tuned hyperparameters in a notebook training job.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

import boto3
from sagemaker import get_execution_role
import sagemaker

In [None]:
role = get_execution_role()
bucket='ml_ufo_sightings'

Pipe Mode protobuf files were saved on S3 previously.

In [None]:
train_file = 'ufo_sightings_train_recordIO_protobuf.data'
training_recordIO_protobuf_location = 's3://{}/algorithms_lab/linearlearner_train/{}'.format(bucket, train_file)
print('The Pipe mode recordIO protobuf training data: {}'.format(training_recordIO_protobuf_location))

validation_file = 'ufo_sightings_validatioin_recordIO_protobuf.data'
validate_recordIO_protobuf_location = 's3://{}/algorithms_lab/linearlearner_validation/{}'.format(bucket, validation_file)
print('The Pipe mode recordIO protobuf validation data: {}'.format(validate_recordIO_protobuf_location))

Call Linear Learner container from ECR repository.

In [None]:
from sagemaker.amazon.amazon_estimator import get_image_uri
import sagemaker

container = get_image_uri(boto3.Session().region_name, 'linear-learner', "1")

In [None]:
# training job name
job_name = 'ufo-linear-learner-job-optimized-{}'.format(datetime.now().strftime("%Y%m%d%H%M%S"))
print('Job name: {}'.format(job_name))

# model output path on S3
output_location = 's3://{}/optimization_evaluation_lab/linearlearner_optimized_output'.format(bucket)

In [None]:
%%time
sess = sagemaker.Session()

# Setup the Linear Leaner algorithm from the ECR container
linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.c4.xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess,
                                       input_mode='Pipe')

# copied over from AWS console optimizer.
linear.set_hyperparameters( feature_dim=22, 
                            predictor_type='multiclass_classifier',
                            num_classes=3,
                            early_stopping_patience=3,
                            epochs=15,
                            l1=0.064774153906635,
                            learning_rate=0.0932904024421902,
                            loss='auto',
                            mini_batch_size=744,
                            normalize_data='true',
                            normalize_label='auto',
                            num_models='auto',
                            optimizer='auto',
                            unbias_data='auto',
                            unbias_label='auto',
                            use_bias='true',
                            wd=0.000212481391205101
                          )


# Launch training job. This method calls the CreateTrainingJob API
data_channels = {'train': training_recordIO_protobuf_location,
                 'validation': validate_recordIO_protobuf_location
                }

linear.fit(data_channels, job_name=job_name)

- Compare time and validation accuracy to baseline model without optimization.
- Stop notebook instance.