In [25]:
import pandas as pd
import numpy as np
import os
import joblib
from sagemaker.sklearn.estimator import SKLearn

In [15]:
cur_path = os.getcwd()
training_data_path = os.path.join(cur_path, 'input_data')
training_files = os.listdir(training_data_path)
file = os.path.join(training_data_path, training_files[0])

In [14]:
file

'/Users/kenhung/Google Drive/Booking_pridiction_liner/dynamic_pricing/bring_your_own_script/input_data/greenx_container_booking_raw.csv'

## Prepare sagemaker session

In [None]:
import sagemaker
from sagemaker import get_execution_role
import boto3

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

## Upload the data for training

In [17]:
import boto3

In [18]:
s3_bucket = 'bluex-booking-pridict'
s3_prefix = 'train_data'

In [19]:
s3_prefix

'train_data'

In [23]:
boto3.Session().resource('s3').Bucket(s3_bucket).Object(os.path.join(s3_prefix,'training.csv')) \
                                .upload_file(file)

## Create SageMaker Scikit Estimator 
To run our Scikit-learn training script on SageMaker, we construct a sagemaker.sklearn.estimator.sklearn estimator, which accepts several constructor arguments:

entry_point: The path to the Python script SageMaker runs for training and prediction.
role: Role ARN

train_instance_type (optional): The type of SageMaker instances for training. Note: Because Scikit-learn does not natively support GPU training, Sagemaker Scikit-learn does not currently support training on GPU instance types.

sagemaker_session (optional): The session used to train on Sagemaker.

hyperparameters (optional): A dictionary passed to the train function as hyperparameters.

In [None]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = 'train.py'
s3_training_data = "s3://bluex-booking-pridict/train_data/training.csv"


sklearn = SKLearn(
    entry_point= script_path,
    framework_version=FRAMEWORK_VERSION,
    py_version='py3', 
    train_instance_type="ml.c4.xlarge",
    role=role,
    base_job_name="Kneighbor-regression_byo",
    sagemaker_session=sagemaker_session,
    hyperparameters={'n_neighborsint': 5}
)

inputs = {'training': s3_train_data}
sklearn.fit(inputs=inputs,  wait=False)

In [None]:
job_name = sklearn.latest_training_job.name
job_name

In [None]:
import time
sm = boto3.client('sagemaker')

result = 1
while result is not 0:
    result = 0    
    respones = sm.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']
    print("job status:{}".format(respones))
    status = 0 if respones == 'Completed' else 1
    result = result + status
    time.sleep(20)
print('all job completed')

## Deploy your training model

In [None]:
sklearn.name = 'Kneighbor-regression_byo'
my_predictor = sklearn.deploy(initial_instance_count = 1, instance_type = deploy_instance_type, wait=False)

In [None]:
endpoint_name = my_predictor.endpoint
endpoint_name

In [None]:
# check endpoint status
result = 1
while result is not 0:
    result = 0
    respones = sm.describe_endpoint(EndpointName=endpoint_name)['EndpointStatus']
    print("job status:{}".format(respones))
    status = 0 if respones == 'InService' else 1
    result = result + status
    time.sleep(20)
print('all endpoints in service')

## Prediction on Test Dataset

In [None]:
from sagemaker.predictor import csv_serializer 

my_predictor.content_type = 'text/csv'
my_predictor.serializer = csv_serializer
my_predictor.deserializer = None

In [28]:
# fbx_diff, d_quote_search_amount, conversion_rate, peak_season
dummy_data = [
    ['-190', '150', '95', '1'],
    ['-190', '120', '90', '1'],
    ['-190', '110', '80', '1'],
    ['-190', '100', '70', '1'],
    ['-500', '10', '30', '0']
]

dummy_df = pd.DataFrame(dummy_data)

In [30]:
dummy_df.to_numpy()

array([['-190', '150', '95', '1'],
       ['-190', '120', '90', '1'],
       ['-190', '110', '80', '1'],
       ['-190', '100', '70', '1'],
       ['-500', '10', '30', '0']], dtype=object)

In [27]:
def predict(data):
    split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))
    predictions = []
    for array in split_array:
        x = my_predictor.predict(array).decode('utf-8')
        x = json.loads(x)
        x = np.array(x["predictions"])
        y = len(x)
        predictions = np.append(predictions,x)
    return predictions

In [None]:
result_out = predict(dummy_df.to_numpy())

In [None]:
result_out

## Delete endpoints

In [None]:
import boto3
sm = boto3.client('sagemaker')
sm.delete_endpoint(EndpointName=endpoint_name)