In [None]:
import pandas as pd

dataset = pd.read_csv('housing.csv', delim_whitespace=True)

In [None]:
print(dataset.shape)
dataset[:5]

In [None]:
# Move 'mdev' column to front
dataset = pd.concat([dataset['mdev'], dataset.drop(['mdev'], axis=1)], axis=1)

In [None]:
training_dataset = dataset.sample(frac=0.90, random_state=59)
validation_dataset = dataset.loc[~dataset.index.isin(training_dataset.index), :]
print(training_dataset.shape)
print(validation_dataset.shape)

In [None]:
training_dataset.to_csv('training_dataset.csv', index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)

In [None]:
import sagemaker

sess = sagemaker.Session()
bucket = sess.default_bucket()

prefix = 'boston-housing'
training_data_path = sess.upload_data(path='training_dataset.csv', key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')

print(training_data_path)
print(validation_data_path)

In [None]:
import boto3
from sagemaker.amazon.amazon_estimator import get_image_uri

region = boto3.Session().region_name    
container = get_image_uri(region, 'linear-learner')

from sagemaker.estimator import Estimator

role = sagemaker.get_execution_role() 

ll_estimator = Estimator(container,
    role=role, 
    train_instance_count=1,
    train_instance_type='ml.m5.large',
    output_path='s3://{}/{}/output'.format(bucket, prefix)
)

ll_estimator.set_hyperparameters(predictor_type='regressor', mini_batch_size=32)

In [None]:
training_data_channel   = sagemaker.s3_input(s3_data=training_data_path, content_type='text/csv')
validation_data_channel = sagemaker.s3_input(s3_data=validation_data_path, content_type='text/csv')

ll_data = {'train': training_data_channel, 'validation': validation_data_channel}

In [None]:
ll_estimator.fit(ll_data)

In [None]:
!aws s3 ls s3://sagemaker-eu-west-1-613904931467/boston-housing/output/linear-learner-2020-04-29-11-31-39-547/output/

In [None]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'linear-learner-demo'+'-'+timestamp
print(endpoint_name)

In [None]:
ll_predictor = ll_estimator.deploy(endpoint_name=endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.t2.medium')

In [None]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98'

In [None]:
from sagemaker.predictor import csv_serializer, csv_deserializer

ll_predictor.content_type = 'text/csv'
ll_predictor.serializer = csv_serializer
ll_predictor.deserializer = csv_deserializer

response = ll_predictor.predict(test_sample)
print(response)

In [None]:
test_samples = ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98',
                '0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14']

response = ll_predictor.predict(test_samples)
print(response)

In [None]:
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                  ContentType='text/csv', 
                                  Body=test_sample)

print(response['Body'].read())

In [None]:
sess.delete_endpoint(endpoint_name=endpoint_name)