In [None]:
import pandas as pd

df = pd.read_csv('item-demand-time.csv', dtype = object, names=['timestamp','value','item'])
df.head(3)

In [None]:
df.item.unique()

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

df.value=pd.to_numeric(df.value)
df_plot = df.pivot(index='timestamp', columns='item', values='value')
df_plot.plot(figsize=(40,10))

In [None]:
df = df[df['item']=='client_12']
df = df.drop(['item', 'timestamp'], axis=1)

In [None]:
df.value = df.value.astype('float32')
df.value*=100
df.value = df.value.astype('int32')
df.head(3)

In [None]:
df.plot(figsize=(40,10))

In [None]:
df.to_csv('electricity.csv', index=False, header=False)

In [None]:
import boto3
import sagemaker

print(sagemaker.__version__)

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sess.default_bucket()
prefix = 'electricity'

training_data_path = sess.upload_data(path='electricity.csv', key_prefix=prefix + '/input/training')
training_data_channel = sagemaker.TrainingInput(s3_data=training_data_path, 
                                           content_type='text/csv;label_size=0',
                                           distribution='ShardedByS3Key')
rcf_data = {'train': training_data_channel}

In [None]:
print(training_data_path)

In [None]:
import boto3
from sagemaker.estimator import Estimator
from sagemaker import image_uris

region = boto3.Session().region_name    
container = image_uris.retrieve('randomcutforest', region)

rcf_estimator = Estimator(container,
                role=role,
                instance_count=1,
                instance_type='ml.m5.large',
                output_path='s3://{}/{}/output'.format(bucket, prefix))

rcf_estimator.set_hyperparameters(feature_dim=1)

In [None]:
rcf_estimator.fit(rcf_data)

In [None]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'rcf-demo'+'-'+timestamp

rcf_predictor = rcf_estimator.deploy(endpoint_name=endpoint_name, 
                        initial_instance_count=1, 
                        instance_type='ml.t2.medium')

In [None]:
rcf_predictor.ContentType = 'text/csv'
rcf_predictor.serializer = sagemaker.serializers.CSVSerializer()
rcf_predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

values = df['value'].astype('str').tolist()
response = rcf_predictor.predict(values)

In [None]:
from statistics import mean,stdev

scores = []
for s in response['scores']:
    scores.append(s['score'])
    
score_mean = mean(scores)
score_std = stdev(scores)

In [None]:
df[2000:2500].plot(figsize=(40,10))

In [None]:
plt.figure(figsize=(40,10))
plt.plot(scores[2000:2500])
plt.autoscale(tight=True)
plt.axhline(y=score_mean+3*score_std, color='red')
plt.show()

In [None]:
rcf_predictor.delete_endpoint()