In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from s3fs import S3FileSystem
import json
import boto3
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import time

In [None]:
from sklearn.preprocessing import MinMaxScaler
#from sklearn.compose import ColumnTransformer
mn=MinMaxScaler()

In [None]:
file_loc="path\to\bitcoin.csv"
data=pd.read_csv(file_loc,header=0)
#rename the columns 
data.columns=["DateTime","Price"]
data.head()

In [None]:
#partition the first 80% of rows for traind dataset and 20% for testing process
partitions=int(len(data)*0.8)
#train_data
data_first_n=data[:partitions]
#test_data
data_last_n=data[partitions:]

In [None]:
#we need to apply MinMaxScaler.fit_transform and train_data set
def fit_transform_train_data(data):
    data.columns=["DateTime","Price"] #rename columns
    
    #set datetime columns as index
    data.set_index("DateTime",drop=True,inplace=True)
    
    #apply minmax scaler on price column   
    data[data.columns]=mn.fit_transform(data)
    
    return data

In [None]:
#We need to apply MinMaxScaler.transform on test_data_set
def transform_test_data(data):
    #rename columns
    data.columns=["DateTime","Price"] 
    
    #set datetime columns as index
    data.set_index("DateTime",drop=True,inplace=True)
    
    #apply minmax scaler on price column 
    data[data.columns]=mn.transform(data)
    
    return data

In [None]:
#This step is for processing inputs.
#I am using previous 5 values to predict the present value . This can be changed as per your convenience
previous_days=5
def obtain_models_input(data):
    #change to numpy
    values=data["Price"].to_numpy()
    input_=[]
    output_=[]
    
    #i am using past 5 values to predict the present value
    for i in range(previous_days,len(values)):
        
        input_.append([values[i-previous_days:i]])
        
        output_.append([values[i]])
        
    #return in the form of numpy  
    return np.array(input_),np.array(output_)

In [None]:
#apply above functions on train and test datasets respectively
data_first_n=fit_transform_train_data(data_first_n)

data_last_n=transform_test_data(data_last_n)

In [None]:
#create data as per timestamp intervals as discussed above
X_train,y_train=obtain_models_input(data_first_n)

X_test,y_test=obtain_models_input(data_last_n)

In [None]:
#check the shapes
print(np.shape(X_train))
print(np.shape(y_train))
print(np.shape(X_test))
print(np.shape(y_test))

In [None]:
#Creation of LSTM Neural Network Model

from keras.layers import Dense,LSTM,SimpleRNN,TimeDistributed,Input,Dropout,BatchNormalization

model=keras.models.Sequential()
model.add(Input(shape=(1,previous_days)))
model.add(LSTM(50,return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(30,return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(10,return_sequences=True))
model.add(Dense(10))
model.add(Dense(1))

In [None]:
earlystopping=keras.callbacks.EarlyStopping(min_delta=0.1,patience=2,monitor="accuracy",restore_best_weights=True)

#Compile the model
model.compile(loss="mse",optimizer=keras.optimizers.SGD(learning_rate=0.01),metrics=["accuracy","mean_absolute_error"])

#Fit the model
model.fit(X_train,y_train,epochs=80,batch_size=32)

In [None]:
#prediction function
def model_predict(X_test):
    #apply model.predict()
    y_pred=model.predict(X_test)
    #convert 3d array to 2d array
    y_pred=y_pred.reshape(len(y_pred),1)
    
    return y_pred

In [None]:
y_pred=mn.inverse_transform(y_pred)
y_test=mn.inverse_transform(y_test)
fig, ax=plt.subplots(2)


ax[0].plot(range(len(y_pred)),y_pred,color="orange")
ax[1].plot(range(len(y_test)),y_test,color="blue")
ax[0].set_title("Predicted")
ax[1].set_title("True Value")
plt.show()



In [None]:
#Reading data from AWS S3 bucket for real time predictions
def read_data_from_s3_bucket(last_read_file):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('your-bucket-name')

    DateTime,Price=[],[]
    key_list=[]
    
    print(last_read_file)
    #We need to store the file names 
    for obj in bucket.objects.all():
        key = obj.key
        #read the files which are not read previously or use values which are greater than last analyzed values
        if(key>last_read_file):
            
            print(key)
            key_list.append(key)
            
            #Read the json message
            body = json.loads(obj.get()['Body'].read())
            
            #Store the respective fields in list so we can convert into dataframe
            DateTime.append(body["DateTime"])
            Price.append(body["Price"])
            
    #convert the list into dataframe 
    df1=pd.DataFrame([DateTime,Price]).T
    
    #return the dataframe and last read file name 
    return df1,key_list[-1]

In [None]:
#Initially last_read_file is null. It gets updated regularly
last_read_file=""

# this loop runs indefinetly untill it is interrupted externally
while True:
    #receive the data frame and last read timestamp
    df1,last_read_file=read_data_from_s3_bucket(last_read_file)
    
    #apply transformations on the dataframe
    df1=transform_test_data(df1)
    
    #get data that can be fed into model
    df1_train,df1_test = obtain_models_input(df1)
    
    #for my convenience in debugging purpouse
    print(df1_train.shape)
    print(df1_test.shape)
    
    #apply model.predict on the newly obtained data
    y_pred1=model_predict(df1_train)
    
    print(y_pred1.shape)
    
    #print the real time values periodically
    for p,q,r,s in zip(df1_test,y_pred1,mn.inverse_transform(df1_test),mn.inverse_transform(y_pred1)):
        print(p,q,r,s)
    
    #run for every 10 minutes 
    time.sleep(600) #run after every 10 minutes