# Demo Notebook for SageMaker Endpoint

- Demo SageMaker Endpoint on Forest Fire Cause Prediction

In [None]:
import boto3

endpoint_name = "sagemaker/endpoint/model"
runtime = boto3.Session().client(service_name='sagemaker-runtime',
                                 region_name='us-east-2')

## Preprocess the raw data

In [None]:
import pandas as pd
# test data, just take first row for sample
bucket='your/bucket/name'
data_key = 'the/etl/output/test/set'
data_location = 's3://{}/{}'.format(bucket, data_key)
print(data_location)

test_df = pd.read_csv(data_location)
test_df_orig = test_df.copy()

In [None]:
sample_data = test_df_orig.iloc[:2]

In [None]:
sample_data.head()

In [None]:
from sklearn import tree, preprocessing
# Preprocessing 
test_df['DATE'] = pd.to_datetime(test_df['discovery_date'] - pd.Timestamp(0).to_julian_date(), unit='D')
test_df['MONTH'] = pd.DatetimeIndex(test_df['DATE']).month
test_df['DAY_OF_WEEK'] = test_df['DATE'].dt.weekday_name
le = preprocessing.LabelEncoder()
test_df['STATE'] = le.fit_transform(test_df['state'])
test_df['DAY_OF_WEEK'] = le.fit_transform(test_df['DAY_OF_WEEK'])

def set_label(cat):
    cause = 0
    natural = ['Lightning']
    accidental = ['Structure','Fireworks','Powerline','Railroad','Smoking',
                  'Children','Campfire','Equipment Use','Debris Burning']
    malicious = ['Arson']
    other = ['Missing/Undefined','Miscellaneous']
    if cat in natural:
        cause = 1
    elif cat in accidental:
        cause = 2
    elif cat in malicious:
        cause = 3
    else:
        cause = 4
    return cause
     

test_df['LABEL'] = test_df['stat_cause_descr'].apply(lambda x: set_label(x)) # I created a copy of the original test_df earlier in the kernel
test_df = test_df.drop('stat_cause_descr',axis=1)
test_df.drop(['state', 'fire_size_class', 'discovery_date', 'DATE', 'cont_date'], axis=1, inplace=True)
test_df = test_df.dropna()


sample_test_X = test_df.drop(['LABEL'], axis=1).values[:2]
sample_test_y = test_df['LABEL'].values[:2]

In [None]:
sample_data.head()

In [None]:
print(sample_test_X)

In [None]:
# Makes sense, because Lightning is encoded to label/category 1 as natural disasters
sample_test_y

In [None]:
df_payload = pd.DataFrame(sample_test_X)

In [None]:
import io
payload_file = io.StringIO()
df_payload.to_csv(payload_file, header = None, index = None)

## Invoke Endpoint

- SageMaker Scikit-learn model server provides a default implementation of input_fn. This function deserializes JSON, CSV, or NPY encoded data into a NumPy array.

In [None]:
# text/csv didn't work because of reshaping (sklearn requires 2 dimensions)
# application/jsonlines not available
# application/json tries to convert JSON to float

response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                   ContentType='text/csv', 
                                   Body=payload_file.getvalue())

In [None]:
# Print predictions
print(response['Body'].read().decode())
print("[natural, natural]")

In [None]:
# Print actual labels
sample_test_y.tolist()
print("[natural, misc.]")