In [1]:
import pandas as pd
dataset = pd.read_csv('housing.csv')
print(dataset.shape)

(506, 13)


In [2]:
dataset[:5]

Unnamed: 0,crim,zn,indus,chas,nox,age,rm,dis,rad,tax,ptratio,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,5.33,36.2


In [3]:
dataset = pd.concat([dataset['medv'], dataset.drop(['medv'], axis=1)], axis=1)

In [4]:
from sklearn.model_selection import train_test_split
training_dataset,validation_dataset = train_test_split(dataset,test_size=0.1)


In [5]:
training_dataset.to_csv('training_dataset.csv', index = False, header = False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)

In [10]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html
import sagemaker
from sagemaker.xgboost.estimator import XGBoost
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

sess = sagemaker.Session()
bucket = sess.default_bucket()                    # Set a default S3 bucket

prefix = 'boston-housing'
training_data_path = sess.upload_data( path='training_dataset.csv', key_prefix = prefix + '/input/training')

validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')
print(training_data_path)
print(validation_data_path)




s3://sagemaker-us-east-1-027893685092/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-1-027893685092/boston-housing/input/validation/validation_dataset.csv


In [12]:
import boto3 
from sagemaker import image_uris
region = boto3.Session().region_name 
container = image_uris.retrieve('xgboost', region, version = "latest")


print(training_data_path)
print(validation_data_path)

hyperparameters = {
       # "max_depth":"5",
      #  "eta":"0.2",
      #  "gamma":"4",
      #  "min_child_weight":"6",
     #   "subsample":"0.7"
      #  "verbose":"1",
        "objective":"reg:linear",
        "num_round":"200",
        "early_stopping_rounds" : "10"}


    
estimator = sagemaker.estimator.Estimator(
                    hyperparameters = hyperparameters,
                    image_uri = container, 
                    framework_version='1.2-1',
                    role=sagemaker.get_execution_role(),
                    instance_count=1,
                    instance_type='ml.m5.large',
                    output_path= 's3://{}/{}/output'.format(bucket, prefix))  
    
train_data_channel = sagemaker.TrainingInput(s3_data=training_data_path, content_type='text/csv')
validation_data_channel = sagemaker.TrainingInput(s3_data=validation_data_path, content_type='text/csv')
estimator.fit({'train':train_data_channel, 'validation':validation_data_channel})

s3://sagemaker-us-east-1-027893685092/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-1-027893685092/boston-housing/input/validation/validation_dataset.csv
2020-11-26 20:15:13 Starting - Starting the training job...
2020-11-26 20:15:16 Starting - Launching requested ML instances.........
2020-11-26 20:16:47 Starting - Preparing the instances for training...
2020-11-26 20:17:27 Downloading - Downloading input data...
2020-11-26 20:18:15 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[2020-11-26:20:18:15:INFO] Running standalone xgboost training.[0m
[34m[2020-11-26:20:18:15:INFO] File size need to be processed in the node: 0.04mb. Available memory size in the node: 249.07mb[0m
[34m[2020-11-26:20:18:15:INFO] Determined delimiter of CSV input is ','[0m
[34m[20:18:15] S3DistributionType set as FullyReplicated[0m
[34m[20:18:15] 455x12 matrix with 5460 entries loaded from /opt/ml/input/data/train?format=csv&

In [13]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime()) 
endpoint_name = 'linear-lerner-demo-' + timestamp
print(endpoint_name)

linear-lerner-demo-26-20-20-03


In [None]:
ll_predictor = estimator.deploy(endpoint_name=endpoint_name, initial_instance_count=1, instance_type='ml.t2.medium')


----------------

In [39]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import CSVSerializer

#ll_predictor.content_type = 'text/csv'
ll_predictor.serializer = sagemaker.serializers.CSVSerializer()
ll_predictor.desarializer = sagemaker.deserializers.CSVDeserializer()

In [40]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98'

In [41]:
response = ll_predictor.predict(test_sample)
print(response)

b'{"predictions": [{"score": 30.447486877441406}]}'


In [None]:
test_sample = ['0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98','0.02731,0.0,7.07,0,0.469,6.4210,78.9,4.9671,2,242.0,17.8,9.14']
response = ll_predictor.predict(test_sample)
print(response)

In [None]:
ll_predictor.delete_endpoint()

echo "# lab10" >> README.md
git init
git add README.md
git commit -m "first commit"
git branch -M main
git remote add origin https://github.com/jferrer21/lab10.git
git push -u origin main


git remote add origin https://github.com/jferrer21/lab10.git
git branch -M main
git push -u origin main
                