In [1]:
# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime
from sagemaker.predictor import csv_serializer

# Define IAM role
role = get_execution_role()
prefix = 'xgboost-as-a-built-in-algo'
my_region = boto3.session.Session().region_name # set the region of the instance

# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
#xgboost_container = sagemaker.image_uris.retrieve("xgboost", my_region, "latest")

#print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + xgboost_container + " container for your SageMaker endpoint.")

In [3]:
bucket_name = 'financepriceyahoo2'
s3 = boto3.resource('s3')
try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else: 
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

S3 bucket created successfully


## 2. Create train and validation csv

In [4]:
!pip install yfinance
import pandas as pd
from datetime import datetime
import yfinance as yf

#initialize parameters
start_date =datetime(2019,1,1)
end_date =datetime(2021,1,1)

# get the data
df_data =yf.download('AAPL',start=start_date,end=end_date)

df_data.reset_index(inplace=True)

df_data

Collecting yfinance
  Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.10.tar.gz (8.2 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: multitasking
  Building wheel for multitasking (setup.py) ... [?25ldone
[?25h  Created wheel for multitasking: filename=multitasking-0.0.10-py3-none-any.whl size=8488 sha256=f34fd08b2ec455e73f80252994b14abaaab48c791e44707369b580ad345ab132
  Stored in directory: /home/ec2-user/.cache/pip/wheels/15/e6/fa/f4bf8d84e804547b3c1b1d4b09a671768502b32ca33ec60651
Successfully built multitasking
Installing collected packages: multitasking, yfinance
Successfully installed multitasking-0.0.10 yfinance-0.1.70
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-01-02,38.722500,39.712502,38.557499,39.480000,38.277523,148158800
1,2019-01-03,35.994999,36.430000,35.500000,35.547501,34.464798,365248800
2,2019-01-04,36.132500,37.137501,35.950001,37.064999,35.936085,234428400
3,2019-01-07,37.174999,37.207500,36.474998,36.982498,35.856091,219111200
4,2019-01-08,37.389999,37.955002,37.130001,37.687500,36.539616,164101200
...,...,...,...,...,...,...,...
500,2020-12-24,131.320007,133.460007,131.100006,131.970001,130.994522,54930100
501,2020-12-28,133.990005,137.339996,133.509995,136.690002,135.679642,124486200
502,2020-12-29,138.050003,138.789993,134.339996,134.869995,133.873077,121047300
503,2020-12-30,135.580002,135.990005,133.399994,133.720001,132.731598,96452100


### Extract,Load and Transform

In [5]:
df_data.drop(axis=1,columns=['Adj Close','Date'],inplace=True)
# Takeout the last row
df_data_features=df_data.iloc[:-1,:]
# get the first column and skip first row, rename Targets
df_data_targets = df_data.iloc[1:,0].rename("Targets")
# Combine everything together to create our final data output.
df_data_features['Target']=list(df_data_targets)

first_column = df_data_features.pop('Target')
df_data_features.insert(0, 'Target',first_column)

df_data_final=df_data_features
df_data_final.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Target,Open,High,Low,Close,Volume
0,35.994999,38.7225,39.712502,38.557499,39.48,148158800
1,36.1325,35.994999,36.43,35.5,35.547501,365248800
2,37.174999,36.1325,37.137501,35.950001,37.064999,234428400
3,37.389999,37.174999,37.2075,36.474998,36.982498,219111200
4,37.822498,37.389999,37.955002,37.130001,37.6875,164101200


In [6]:
#random sample
df_randomized = df_data_final.sample(frac=1,random_state=123)
train_data,test_data=np.split(df_randomized,[int(0.8*len(df_randomized))])

print(train_data.shape,test_data.shape)

(403, 6) (101, 6)


### Set a path and upload dataset to S3 bucket

In [8]:
#train_csv_path ='s3://{}//{}/{}/{}'.format(bucket_name,prefix,'train','train.csv')
#test_csv_path ='s3://{}//{}/{}/{}'.format(bucket_name,prefix,'test','test.csv')
#Upload the files over to the buckets
#train_data.to_csv(train_csv_path,index=False,header=False)
#test_data.to_csv(test_csv_path,index=False,header=False)
train_data.to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
train_input = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

In [9]:
test_data.to_csv('test.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file('test.csv')
test_input = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/test'.format(bucket_name, prefix), content_type='csv')

## 3. Build XGBoost

we will use XGBoost as built in algorithm

In [10]:
# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
# specify the repo_version depending on your preference.
#find out the image that is suitable for your own application, I am using 1.2-2
xgboost_container = sagemaker.image_uris.retrieve("xgboost", my_region, "latest")

print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + xgboost_container + " container for your SageMaker endpoint.")

Success - the MySageMakerInstance is in the us-west-2 region. You will use the 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest container for your SageMaker endpoint.


**Initialize hyperparameters**

There are 2 types of parameters, booter parameters ans Learning Task Parameters

In [16]:
# initialize hyperparameters
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"reg:linear",
        "early_stopping_rounds":10,
        "num_round":1000}

Set an output path where the trained model will be saved

In [12]:
# first {} is bucket name
# second {} is prefix
# output folder
output_path = 's3://{}/{}/{}/output'.format(bucket_name, prefix, 'output')

print(output_path)

s3://financepriceyahoo2/xgboost-as-a-built-in-algo/output/output


**Construct a Sagemeker estimator that calls the xgboost-container**

In [17]:
sess = sagemaker.Session()
# construct a SageMaker estimator that calls the xgboost-container
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role=role,
                                          instance_count=1, 
                                          instance_type='ml.m4.xlarge', 
                                          volume_size=5, # 5 GB 
                                          output_path=output_path,
                                          sagemaker_session=sess,
                                          use_spot_instances=True,# Shared instances amount of time we can wait for instance to be available
                                          max_run=300,
                                          max_wait=600)

### Execute the XGBoost training job

In [18]:
estimator.fit({'train':train_input,'validation':test_input})

2022-02-24 15:56:34 Starting - Starting the training job...
2022-02-24 15:56:57 Starting - Launching requested ML instancesProfilerReport-1645718193: InProgress
.........
2022-02-24 15:58:17 Starting - Preparing the instances for training.........
2022-02-24 16:00:00 Downloading - Downloading input data...
2022-02-24 16:00:35 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[2022-02-24:16:00:36:INFO] Running standalone xgboost training.[0m
[34m[2022-02-24:16:00:36:INFO] File size need to be processed in the node: 0.05mb. Available memory size in the node: 8340.5mb[0m
[34m[2022-02-24:16:00:36:INFO] Determined delimiter of CSV input is ','[0m
[34m[16:00:36] S3DistributionType set as FullyReplicated[0m
[34m[16:00:36] 403x5 matrix with 2015 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2022-02-24:16:00:36:INFO] Determined delimiter of CSV input is ','[0m
[34m[16:00:36] S3DistributionT

[Deploy and test Lambda-ready Model Endpoint](https://youtu.be/LbeWBcXEW7s)

## Deploy trained xgb model as Endpoints

1. Environment

    * When SageMaker - Serialization by User
    * Outside SageMaker - Serialization by Endpoint
    
2. Method to invoke the endpoint
    * **API - Single Prediction**
    * s3 Bucket - Batch Prediction
    
3. Data type tested on method
    * **API -JSON**
    * s3 Bucket -CSV

To host a model through Amazon EC2 using Amazon Sagemaker, deploy the model that you trained in Create and Run a Training job by calling the **deploy method of the xgb_model estimator**.

When you call the deploy method, few key things that you need to specify:
    

In [19]:
from sagemaker.serializers import CSVSerializer

xgb_predictor = estimator.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge',serializer=CSVSerializer())

-------!

In [20]:
xgb_predictor.endpoint_name

'xgboost-2022-02-24-16-20-48-958'

**Make prediction with the use of Endpoints**

In [21]:
#initialize parameters
start_date =datetime(2021,1,4)
end_date =datetime(2021,1,5)

# get the data
df_data =yf.download('AAPL',start=start_date,end=end_date)

df_data.reset_index(inplace=True)

df_data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-04,133.520004,133.610001,126.760002,129.410004,128.45343,143301900


In [22]:
df_data.drop(axis=1,columns=['Adj Close','Date'],inplace=True)
# Takeout the last row
data_features_array=df_data.values
data_features_array

array([[1.33520004e+02, 1.33610001e+02, 1.26760002e+02, 1.29410004e+02,
        1.43301900e+08]])

### Serialize data

**Inference - Serialized Input by Sagemaker Function**

This is within the Sagemaker

In [37]:
# Make predictions
# 'utf-8' changes bytes to strings
Y_pred_Fcn = xgb_predictor.predict(data_features_array).decode('utf-8') # predict!
Y_pred_Fcn

'131.989990234375'

**Inference - Serialized Input by built-in function (Lambda function friendly)**

In [38]:
Input = data_features_array.tolist()*3
Serialized_Input=','.join(map(str,Input[0]))#take the first element int list of list and convert the to string

Y_pred=xgb_predictor.predict(Serialized_Input).decode('utf-8')
Y_pred

'131.989990234375'

In [35]:
(data_features_array.tolist()*3)[0]

[133.52000427246094,
 133.61000061035156,
 126.76000213623047,
 129.41000366210938,
 143301900.0]

## 5. Lamdba Function handler

Reference:

https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html 

https://docs.aws.amazon.com/lambda/latest/dg/python-context.html

### Inference- Lambda function(Base)

In [43]:
{'data':
    data_features_array.tolist()*3
            
             }

{'data': [[133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0]]}

In [42]:
import boto3

ENDPOINT_NAME = 'xgboost-2022-02-24-16-20-48-958'

# connect to the sagemaker runtime
runtime = boto3.client('runtime.sagemaker')

# define lambda handler function

def lambda_handler(event,context):
    inputs = event['data']
    
    # in case you are being provided list of list, then 
    # Use for loop to provide multiple inference
    result =[]
    for input in inputs:
        # serialize input and passit on to our sagemaker
        serialized_input=','.join(map(str,input))
    
        response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                      ContentType='text/csv',
                                      Body=serialized_input)
    
        result.append(response['Body'].read().decode() )# decode byte to string type
    return result

Input_json = {'data':
    data_features_array.tolist()*3
            
             }
#context, no information passed, because it is information from the runtime.
result = lambda_handler(Input_json,__)
result

['131.989990234375', '131.989990234375', '131.989990234375']

Next Set up the Lambda and then copy and paste it to lambda functions in AWS Lambda console.

[Create Lambda function to invoke Endpoint](https://youtu.be/vvjcGWnb0Os)

Give a role and policy to the lambda function to accept the sagemaker endpoints.

Under IAM Role, See myFunction... role , attach a policy AmazonSagemakerFullAccess to Lambda function

In [44]:
# How it is set in Lambda test event

{
    "data": [
        [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0]
   ]
    
}

{'data': [[133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0],
  [133.52000427246094,
   133.61000061035156,
   126.76000213623047,
   129.41000366210938,
   143301900.0]]}

## Send results via email

[Publish response in email with SNS and Lambda](https://youtu.be/i9dmpj7X-6U)

Using Amazon SNS we are Using Application-to-person(A2P):

**Application-to-person (A2P)**

In A2P, Amazon SNS lets you send push notifications to mobile apps, text messages to mobile phone numbers, and plain-text emails to email addresses. You can fan out messages with a topic, or publish to mobile endpoints directly

In this example, we are sending emails to someone/people.

* Provide Topic ARN to Lambda push function. e.g arn:aws:sns:eu-central-1:######:MyTopic

[SNS Client publish](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sns.html)

In [45]:
import boto3

ENDPOINT_NAME = 'xgboost-2022-02-24-16-20-48-958'

# connect to the sagemaker runtime
runtime = boto3.client('runtime.sagemaker')
email_client = boto3.client('sns')#Initial the SNS service
# define lambda handler function

def lambda_handler(event,context):
    inputs = event['data']
    
    # in case you are being provided list of list, then 
    # Use for loop to provide multiple inference
    result =[]
    for input in inputs:
        # serialize input and passit on to our sagemaker
        serialized_input=','.join(map(str,input))
    
        response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                      ContentType='text/csv',
                                      Body=serialized_input)
    
        result.append(response['Body'].read().decode() )# decode byte to string type
    
    
    response_sns = email_client.publish(
        TopicArn='arn:aws:sns:eu-central-1:####:MyTopic',
        Message='Prediction is '+str(result),
        Subject='Apple Stock Price Daily Predictions')
    
    return result

I Encountered error with sns with lambda due to TopicArn.


{
  "errorMessage": "An error occurred (InvalidParameter) when calling the Publish operation: Invalid parameter: TopicArn",
  "errorType": "InvalidParameterException",
  "requestId": "######",
  "stackTrace": [
    "  File \"/var/task/lambda_function.py\", line 27, in lambda_handler\n    response_sns = email_client.publish(\n",
    "  File \"/var/runtime/botocore/client.py\", line 386, in _api_call\n    return self._make_api_call(operation_name, kwargs)\n",
    "  File \"/var/runtime/botocore/client.py\", line 705, in _make_api_call\n    raise error_class(parsed_response, operation_name)\n"
  ]
}

may be because of different regions,  us-east-1 and eu-central-1. Thats a guess.

## Build, deploy and test an API Gateway endpoint for the REST API

Refrence: [Build and deploy a REST API with API Gateway](https://youtu.be/D4v_wTvn51I)

Using Amazon API Gateway.

 Invoke URL: https://####.execute-api.us-west-2.amazonaws.com/xgbmodel

In [49]:
import requests

# defining the api -endpoint
API_ENDPOINT="https://4vi30eq07i.execute-api.us-west-2.amazonaws.com/xgbmodel"
# data to be sent to the api
json = {"data":data_features_array.tolist()*3}

# sending post request and saving response as response object
r = requests.post(url=API_ENDPOINT,json=json)

With the lambda integration the Lambda function output function is returned as a 200 OK response.

HTTP status codes reference: https://aws.amazon.com/blogs/compute/error-handling-patterns-in-amazon-api-gateway-and-aws-lambda/

In [50]:
print(f"Status Code:{r.status_code},Response: {r.json()}")

Status Code:200,Response: ['131.989990234375', '131.989990234375', '131.989990234375']


# Success

## Close and Terminate

In [51]:
sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)

#or
#xgb_predictor.delete_endpoint(delete_endpoint_config=True)

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [52]:
bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
bucket_to_delete.objects.all().delete()

[{'ResponseMetadata': {'RequestId': 'PPM6Z9K0R8BFGA1H',
   'HostId': 'RYii5B4fXjjXsCElSdcIFBHW1RI4BFzHN+/FiKxGYl4b7VFxZVcBkzL0WEhjXfOtKgCLlv12YfQ=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'RYii5B4fXjjXsCElSdcIFBHW1RI4BFzHN+/FiKxGYl4b7VFxZVcBkzL0WEhjXfOtKgCLlv12YfQ=',
    'x-amz-request-id': 'PPM6Z9K0R8BFGA1H',
    'date': 'Thu, 24 Feb 2022 19:32:35 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'xgboost-as-a-built-in-algo/output/output/xgboost-2022-02-24-15-56-33-915/output/model.tar.gz'},
   {'Key': 'xgboost-as-a-built-in-algo/output/output/xgboost-2022-02-24-15-56-33-915/profiler-output/framework/training_job_end.ts'},
   {'Key': 'xgboost-as-a-built-in-algo/output/output/xgboost-2022-02-24-15-56-33-915/profiler-output/system/training_job_end.ts'},
   {'Key': 'xgboost-as-a-built-in-algo/output/output/xgboost-2022-02-24-15-45-30-310/r

**Delete your SageMaker Notebook:** Stop and delete your SageMaker Notebook.

1. Open the SageMaker console.
2. Under **Notebooks**, choose **Notebook instances**.
3. Choose the notebook instance that you created for this tutorial, then choose **Actions, Stop**. The notebook instance takes up to several minutes to stop. When **Status** changes to **Stopped**, move on to the next step.
4. Choose **Actions,** then **Delete**.
5. Choose **Delete**.**Delete your SageMaker Notebook:** Stop and delete your SageMaker Notebook.

1. Open the SageMaker console.
2. Under **Notebooks**, choose **Notebook instances**.
3. Choose the notebook instance that you created for this tutorial, then choose **Actions, Stop**. The notebook instance takes up to several minutes to stop. When **Status** changes to **Stopped**, move on to the next step.
4. Choose **Actions,** then **Delete**.
5. Choose **Delete**.
6. Delete API Gateway by clicking on API name-> Action-> Delete.
7. Delete Amazon SNS same Procedure as API Gateway, SNS is also Always free.
8. Delete Amazon Lambda same Procedure as API Gateway, but this is an always free service in AWS Free Tier.

Reference [Save Cost](https://youtu.be/nMBSDEYO_BI)