In [None]:
# First we will import the necessary Library 

import os
import pandas as pd
import numpy as np
import math
import datetime as dt
import matplotlib.pyplot as plt

# For Evalution we will use these library

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# For model building we will use these library

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM


# For PLotting we will use these library

import matplotlib.pyplot as plt
from itertools import cycle

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

#Important library imports...
import seaborn as sns

from sklearn import preprocessing
from numpy import set_printoptions
%matplotlib inline

In [None]:
#Data read in...
crypto_data = pd.read_csv('./complete_dataset.csv')
crypto_data.columns.values[0] = 'index'
crypto_data.columns.values[1] = 'DateTime'
#crypto_data.drop('index')
crypto_data.drop(columns=['index'], inplace=True)
crypto_data.head()

In [None]:
#Ensure the dataset is properly arranged by cryptocurrency and datetime stamp.
df = crypto_data.sort_values(by=['Symbol', 'DateTime'])
pd.to_datetime(df['DateTime'])
df.head()

In [None]:
#veiwing other portions of the data and the other charcteristics
df.tail()

PREPROCESSING

In [None]:
closedf = df[['DateTime', 'Close', 'Symbol']]
print(closedf.head())

groups = closedf.groupby('Symbol')

In [None]:
# for symbol, group_df in groups:
#     fig = px.line(group_df, x=group_df.DateTime, y=group_df.Close,labels={'DateTime':'DateTime','Close':'Close Stock'}, title=f'{symbol} - Time vs Close')
#     fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
#     # fig.update_layout(title_text='Whole period of timeframe of Bitcoin close price 2014-2022', plot_bgcolor='white', 
#     #                 font_size=15, font_color='black')
#     fig.update_xaxes(showgrid=False, type='category')
#     fig.update_yaxes(showgrid=False)
#     #fig.show()

#     #print(group_df.head())

In [None]:
# convert an array of values into a dataset matrix

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
filter1Options = [
      'APT-USD',
      'ARB-USD',
      'AVAX-USD',
      'BNB-USD',
      'BTC-USD',
      'BCH-USD',
      'ADA-USD',
      'LINK-USD',
      'DOGE-USD',
      'ETH-USD',
      'ETC-USD',
      'HBAR-USD',
      'LTC-USD',
      'XMR-USD',
      'MATIC-USD',
      'SHIB-USD',
      'SOL-USD',
      'TRX-USD',
      'WBTC-USD',
      'XRP-USD'
    ]


#convert symbol into dummmy integer values

for symbol, group_df in groups:
    if symbol not in filter1Options:
        print(f'{symbol} not in list...\n')
        continue
    else:
        try:
            #print(group_df.head())
            use_symbol = symbol
            new_group_df = group_df[['DateTime', 'Close']]
            del group_df['Symbol']

            #PLOT GRAPH
            fig = px.line(new_group_df, x=new_group_df.DateTime, y=new_group_df.Close,labels={'DateTime':'DateTime','Close':'Close Stock'})
            fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
            fig.update_layout(title_text=f'Whole period of timeframe of {use_symbol} close price 2014-2022', plot_bgcolor='white', 
                            font_size=15, font_color='black')
            fig.update_xaxes(showgrid=False)
            fig.update_yaxes(showgrid=False)
            #fig.show()

            del new_group_df['DateTime']
            scaler=MinMaxScaler(feature_range=(0,1))
            new_group_df=scaler.fit_transform(np.array(new_group_df).reshape(-1,1))
            #print(new_group_df.shape)

            # we keep the training set as 60% and 40% testing set

            training_size=int(len(new_group_df)*0.60)
            test_size=len(new_group_df)-training_size
            train_data,test_data=new_group_df[0:training_size,:],new_group_df[training_size:len(new_group_df),:1]
            print("train_data: ", train_data.shape)
            print("test_data: ", test_data.shape)


            time_step = 15
            X_train, y_train = create_dataset(train_data, time_step)
            X_test, y_test = create_dataset(test_data, time_step)

            print("X_train: ", X_train.shape)
            print("y_train: ", y_train.shape)
            print("X_test: ", X_test.shape)
            print("y_test", y_test.shape)

            model=Sequential()

            model.add(LSTM(10,input_shape=(None,1),activation="relu"))

            model.add(Dense(1))

            model.compile(loss="mean_squared_error",optimizer="adam")

            history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)

            #import matplotlib.pyplot as plt

            loss = history.history['loss']
            val_loss = history.history['val_loss']

            epochs = range(len(loss))

            plt.plot(epochs, loss, 'r', label='Training loss')
            plt.plot(epochs, val_loss, 'b', label='Validation loss')
            plt.title('Training and validation loss')
            plt.legend(loc=0)
            plt.figure()


            plt.show()

            ### Lets Do the prediction and check performance metrics
            train_predict=model.predict(X_train)
            test_predict=model.predict(X_test)
            train_predict.shape, test_predict.shape

            model.save(f'./All_Models/{use_symbol}.h5')

            # Transform back to original form

            train_predict = scaler.inverse_transform(train_predict)
            test_predict = scaler.inverse_transform(test_predict)
            original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
            original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

            # Evaluation metrices RMSE and MAE
            print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
            print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
            print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
            print("-------------------------------------------------------------------------------------")
            print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
            print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
            print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))


            print("Train data explained variance regression score:", 
                explained_variance_score(original_ytrain, train_predict))
            print("Test data explained variance regression score:", 
                explained_variance_score(original_ytest, test_predict))
                
            print("Train data R2 score:", r2_score(original_ytrain, train_predict))
            print("Test data R2 score:", r2_score(original_ytest, test_predict))

            print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
            print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
            print("----------------------------------------------------------------------")
            print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
            print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))


            # shift train predictions for plotting

            look_back=time_step
            trainPredictPlot = np.empty_like(new_group_df)
            trainPredictPlot[:, :] = np.nan
            trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
            print("Train predicted data: ", trainPredictPlot.shape)

            # shift test predictions for plotting
            testPredictPlot = np.empty_like(new_group_df)
            testPredictPlot[:, :] = np.nan
            testPredictPlot[len(train_predict)+(look_back*2)+1:len(new_group_df)-1, :] = test_predict
            print("Test predicted data: ", testPredictPlot.shape)

            names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

            print(group_df.head())

            plotdf = pd.DataFrame({'date': group_df['DateTime'],
                                'original_close': group_df['Close'],
                                'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                                'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

            fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                                    plotdf['test_predicted_close']],
                        labels={'value':'Stock price','date': 'DateTime'})
            fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                            plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
            fig.for_each_trace(lambda t:  t.update(name = next(names)))

            fig.update_xaxes(showgrid=False)
            fig.update_yaxes(showgrid=False)
            fig.show()
        except Exception as e:
            print(e)
            pass