In [22]:
import pandas as pd

dataset = pd.read_csv('housing.csv')

In [23]:
print(dataset.shape)
dataset[:5]

(506, 13)


Unnamed: 0,crim,zn,indus,chas,nox,age,rm,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,5.33,36.2


In [24]:
# Move 'medv' column to front
dataset = pd.concat([dataset['medv'], dataset.drop(['medv'], axis=1)], axis=1)

In [25]:
from sklearn.model_selection import train_test_split
training_dataset, validation_dataset = train_test_split(dataset, test_size=0.1)

print(training_dataset.shape)
print(validation_dataset.shape)

(455, 13)
(51, 13)


In [26]:
training_dataset.to_csv('training_dataset.csv', index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)

In [27]:
import sagemaker

print(sagemaker.__version__)

sess = sagemaker.Session()
bucket = sess.default_bucket()

prefix = 'boston-housing'
training_data_path = sess.upload_data(path='training_dataset.csv', key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')

print(training_data_path)
print(validation_data_path)

2.88.1
s3://sagemaker-us-east-1-607098578469/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-1-607098578469/boston-housing/input/validation/validation_dataset.csv


In [28]:
import boto3
from sagemaker import image_uris

region = boto3.Session().region_name    
container = image_uris.retrieve('linear-learner', region)

print(container)

from sagemaker.estimator import Estimator

role = sagemaker.get_execution_role() 

ll_estimator = Estimator(container,
    role=role, 
    instance_count=1,
    instance_type='ml.m5.large',
    output_path='s3://{}/{}/output'.format(bucket, prefix)
)

ll_estimator.set_hyperparameters(predictor_type='regressor', mini_batch_size=32)

382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1


In [29]:
training_data_channel   = sagemaker.TrainingInput(s3_data=training_data_path, content_type='text/csv')
validation_data_channel = sagemaker.TrainingInput(s3_data=validation_data_path, content_type='text/csv')

ll_data = {'train': training_data_channel, 'validation': validation_data_channel}

In [30]:
ll_estimator.fit(ll_data)

2022-05-17 20:26:39 Starting - Starting the training job...
2022-05-17 20:27:02 Starting - Preparing the instances for trainingProfilerReport-1652819198: InProgress
.........
2022-05-17 20:28:22 Downloading - Downloading input data......
2022-05-17 20:29:31 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[05/17/2022 20:29:39 INFO 140283694016320] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'a

In [34]:
%%bash -s "$ll_estimator.output_path"
aws s3 ls --recursive $1

2022-05-17 19:42:12        916 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/output/model.tar.gz
2022-05-17 19:42:14          0 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/profiler-output/framework/training_job_end.ts
2022-05-17 19:42:01      23979 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/profiler-output/system/incremental/2022051719/1652816400.algo-1.json
2022-05-17 19:42:10     181224 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/profiler-output/system/incremental/2022051719/1652816460.algo-1.json
2022-05-17 19:42:10      27550 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/profiler-output/system/incremental/2022051719/1652816520.algo-1.json
2022-05-17 19:42:14          0 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/profiler-output/system/training_job_end.ts
2022-05-17 19:43:00     329725 boston-housing/output/linear-learner-2022-05-17-19-38-40-996/rule-output/ProfilerReport-1652816320/pr

In [45]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'linear-learner-demo-'+timestamp
print(endpoint_name)

linear-learner-demo-17-21-01-10


In [48]:
ll_predictor = ll_estimator.deploy(endpoint_name=endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.t2.medium')

---------!

In [49]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98'

In [50]:
ll_predictor.content_type = 'text/csv'
ll_predictor.serializer = sagemaker.serializers.CSVSerializer()
ll_predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

response = ll_predictor.predict(test_sample)
print(response)

AttributeError: can't set attribute

In [51]:
test_samples = ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98',
                '0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,9.14']

response = ll_predictor.predict(test_samples)
print(response)

ParamValidationError: Parameter validation failed:
Invalid type for parameter Body, value: ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98', '0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,9.14'], type: <class 'list'>, valid types: <class 'bytes'>, <class 'bytearray'>, file-like object

In [52]:
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                  ContentType='text/csv', 
                                  Body=test_sample)

print(response['Body'].read())

b'{"predictions": [{"score": 30.345577239990234}]}'


In [54]:
ll_predictor.delete_endpoint()

ClientError: An error occurred (ValidationException) when calling the DeleteEndpointConfig operation: Could not find endpoint configuration "arn:aws:sagemaker:us-east-1:607098578469:endpoint-config/linear-learner-demo-17-21-01-10".