In [None]:
import numpy as np
import pandas as pd
# Define IAM role
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

In [None]:
# Uploading Data to S3

In [None]:
bucket_name = 'mehrshad-ml-sagemaker'
training_file_key = 'biketrain/bike_train_numeric_columns.recordio'

s3_model_output_location = r's3://{0}/biketrain/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_file_key)

In [None]:
print(s3_model_output_location)
print(s3_training_file_location)

In [None]:
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [None]:
write_to_s3('bike_train_numeric_columns.recordio',bucket_name,training_file_key)

In [None]:
## Training Algorithm Docker Image

In [None]:
containers = {'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/pca:latest',
              'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/pca:latest',
              'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/pca:latest',
              'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/pca:latest'}

In [None]:
role = get_execution_role()

In [None]:
print(role)

In [None]:
## Building Model

In [None]:
sess = sagemaker.Session()

In [None]:
# Access appropriate algorithm container image
#  Specify how many instances to use for distributed training and what type of machine to use
#  Specify where the trained model artifacts needs to be stored
estimator = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.m4.xlarge',
                                       output_path=s3_model_output_location,
                                       sagemaker_session=sess,
                                       base_job_name ='pca-biketrain-v1')

In [None]:
# Specify hyper parameters that appropriate for the training algorithm
estimator.set_hyperparameters(feature_dim=4,
                        num_components=3,
                        subtract_mean=False,
                        algorithm_mode='regular',
                        mini_batch_size=200)

In [None]:
estimator.hyperparameters()

In [None]:
# Training the model

In [None]:
estimator.fit({'train':s3_training_file_location})

In [None]:
# Deploying Model

In [None]:
predictor = estimator.deploy(initial_instance_count=1,
                             instance_type='ml.m4.xlarge',
                             endpoint_name = 'pca-biketrain-v1')

In [None]:
# Running Predictions

In [None]:
from sagemaker.predictor import csv_serializer, json_deserializer

predictor.content_type = 'text/csv'
predictor.serializer = csv_serializer
predictor.deserializer = json_deserializer

In [None]:
predictor.predict([[-1.333660693,-1.092736969,0.993213054,1.567753667]])