In [4]:
"""
This is the code for the lambda function which is used to get the spot price history for the given instance type, product description, region and time period.
This code is used to get the spot price history for the last 90 days.
This code is used to train the model and make predictions for the next 7 days.
This code is used to deploy the model and make predictions for the next 7 days.

Author: Nirbhay Bagmar
Date Created: 2019-09-17
"""

import boto3
import datetime
import pandas as pd
from sklearn import preprocessing
import datetime as dt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

import dateutil
import itertools


def handler(event, context):
    """
    This function is the entry point for the lambda function.
    It takes the event and context object as input and returns the response.

    @param event: The event object
    @param context: The context object
    @return: The response object
    """
    start_time = event['07:06:30']
    end_time = event['07:06:30']
    region = event['ap-south-1a']
    product_description = event['Amazon-ec2']

    # Create the boto3 client for calling the EC2 API
    client = boto3.client('ec2', region_name=region)

    # Get the spot price history
    response = client.describe_spot_price_history(
        InstanceTypes=event['m4.xlarge'],
        ProductDescriptions=product_description,
        StartTime=start_time,
        EndTime=end_time,
        MaxResults=10000
    )
    return response['SpotPriceHistory']


def wrapper(instanceList, ProductDescriptionList, region, numberOfDays=7):
    """
    This function is the wrapper function for the lambda function.
    It takes the instanceList, ProductDescriptionList, region and numberOfDays as input and returns the predictions.

    @param instanceList: The list of instance types
    @param ProductDescriptionList: The list of product descriptions
    @param region: The region
    @param numberOfDays: The number of days for which the predictions are to be made
    @return: The predictions
    """

    m4_list = []
    for i in range(1, 90):

        # call the lambda function
        output = (handler({
            'instances_list': instanceList,
            'start_time': datetime.datetime.now() - datetime.timedelta(i),
            'end_time': datetime.datetime.now() - datetime.timedelta(i-1),
            'product_description': ProductDescriptionList,
            'region': region
        }, ''))
        for j in range(0, len(output)):
            m4_list.append(output[j])

    # convert the list into dataframe
    df = pd.DataFrame(m4_list)
    df_uni = df.drop_duplicates()
    df_uni.reset_index(drop=True, inplace=True)

    # this is for building the test set while deploying
    availzone = df_uni.AvailabilityZone.unique()

    # feature engineering for training set and test set
    le = preprocessing.LabelEncoder()
    encode_ProductDescription = le.fit_transform(df_uni.ProductDescription)
    encode_InstanceType = le.fit_transform(df_uni.InstanceType)
    encode_AvailabilityZone = le.fit_transform(df_uni.AvailabilityZone)

    df_uni = df_uni.assign(year=df_uni.Timestamp.dt.year)
    df_uni = df_uni.assign(month=df_uni.Timestamp.dt.month)
    df_uni = df_uni.assign(day=df_uni.Timestamp.dt.day)
    df_uni = df_uni.assign(day_of_week=df_uni.Timestamp.dt.weekday)
    df_uni = df_uni.assign(hour=df_uni.Timestamp.dt.hour)
    df_uni = df_uni.assign(minute=df_uni.Timestamp.dt.minute)
    df_uni = df_uni.assign(second=df_uni.Timestamp.dt.second)
    df_uni = df_uni.assign(ProdDescEnc=encode_ProductDescription)
    df_uni = df_uni.assign(AvailZoneEnc=encode_AvailabilityZone)
    df_uni = df_uni.assign(InstanceTypeEnc=encode_InstanceType)

    # Prepare data for model 
    df_uni.sort_values(['Timestamp'], ascending=[True], inplace=True)

    # split the data into train 
    y = df_uni.loc[:, 'SpotPrice']
    X = df_uni.loc[:, ['AvailZoneEnc', 'InstanceTypeEnc', 'ProdDescEnc',
                       'year', 'month', 'day', 'day_of_week', 'hour', 'minute', 'second']]

    # train the model using random forest regressor
    regr = RandomForestRegressor(max_depth=2, random_state=0)
    regr.fit(X, y)

    # create the test set for deployment using the last 7 days
    rng = pd.date_range(start=datetime.datetime.now(), end=datetime.datetime.now(
    ) + datetime.timedelta(numberOfDays), freq='H', tz=dateutil.tz.tzutc(), normalize='False')

    # convert the index into column from feature engineering for test set
    real_test = pd.Series(rng)
    real_test_frame = real_test.to_frame()
    real_test_frame.columns = ['Timestamp']

    real_test_frame['year'] = real_test_frame.Timestamp.dt.year
    real_test_frame['month'] = real_test_frame.Timestamp.dt.month
    real_test_frame['day'] = real_test_frame.Timestamp.dt.day
    real_test_frame['day_of_week'] = real_test_frame.Timestamp.dt.weekday
    real_test_frame['hour'] = real_test_frame.Timestamp.dt.hour
    real_test_frame['minute'] = real_test_frame.Timestamp.dt.minute
    real_test_frame['second'] = real_test_frame.Timestamp.dt.second

    final_deploy = pd.DataFrame()
    [r, c] = real_test_frame.shape

    rowlists = [instanceList, ProductDescriptionList, availzone]
    i = 0
    for combination in (list(itertools.product(*rowlists))):
        final_deploy = final_deploy.append(real_test_frame, ignore_index=True)
        final_deploy.loc[i:i+r-1, 'InstanceType'] = combination[0]
        final_deploy.loc[i:i+r-1, 'ProductDescription'] = combination[1]
        final_deploy.loc[i:i+r-1, 'AvailabilityZone'] = combination[2]
        i = i + r

    # fir transform the test set for deployment using the label encoder from training
    encod_ProductDescription = le.fit_transform(
        final_deploy.ProductDescription)
    encod_InstanceType = le.fit_transform(final_deploy.InstanceType)
    encod_AvailabilityZone = le.fit_transform(final_deploy.AvailabilityZone)

    # assign the encoded values to the test set for deployment
    final_deploy = final_deploy.assign(ProdDescEnc=encod_ProductDescription)
    final_deploy = final_deploy.assign(AvailZoneEnc=encod_AvailabilityZone)
    final_deploy = final_deploy.assign(InstanceTypeEnc=encod_InstanceType)

    test_deploy = final_deploy.loc[:, ['AvailZoneEnc', 'InstanceTypeEnc',
                                       'ProdDescEnc', 'year', 'month', 'day', 'day_of_week', 'hour', 'minute', 'second']]

    # make predictions for the test set and convert the series into dataframe
    future = regr.predict(test_deploy)
    future_series = pd.Series(future)

    # create the final dataframe for predictions
    pretty_predictions = pd.DataFrame()
    pretty_predictions = pretty_predictions.assign(
        Timestamp=final_deploy.Timestamp)
    pretty_predictions = pretty_predictions.assign(
        AvailabilityZone=final_deploy.AvailabilityZone)
    pretty_predictions = pretty_predictions.assign(
        InstanceType=final_deploy.InstanceType)
    pretty_predictions = pretty_predictions.assign(
        ProductDescription=final_deploy.ProductDescription)
    pretty_predictions = pretty_predictions.assign(
        Predicted_SpotPrice=future_series)

    return pretty_predictions


# enter the instance types you need below as list!
# give only one product description for one run
df = wrapper(['m4.xlarge'], ['Linux/UNIX (Amazon VPC)'], 'ap-south-1a')
df

KeyError: '07:06:30'