In [1]:
'''from google.colab import drive
drive.mount('/content/gdrive')
'''

"from google.colab import drive\ndrive.mount('/content/gdrive')\n"

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from pandas.plotting import register_matplotlib_converters
from keras import optimizers
%matplotlib inline
%config InlineBackend.figure_format='retina'

register_matplotlib_converters()
sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 22, 10

RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


In [None]:
'''df = pd.read_csv(
  "gdrive/My Drive/Data Files/Google_Stock_Price_Train.csv", 
  parse_dates=True, 
  index_col=0
)
df.head()
'''

df = pd.read_csv(
  "https://raw.githubusercontent.com/D-GithubAccount/RNN_LSTM_GOOGL-StockPricePrediction/main/Google_Stock_Price_Train.csv", 
  parse_dates=True, 
  index_col=0
)
df.head()


In [None]:
#url = 'gdrive/My Drive/Data Files/Google_Stock_Price_Test.csv'
url = 'https://raw.githubusercontent.com/D-GithubAccount/RNN_LSTM_GOOGL-StockPricePrediction/main/Google_Stock_Price_Test.csv'
test_data = pd.read_csv(url,parse_dates = True,index_col=0)
test_data.head()

In [None]:
train_size = int(len(df))
test_size = int(len(test_data))
train, test = df.iloc[0:train_size], test_data.iloc[0:len(test_data)]
print(len(train), len(test))

In [None]:
# Select features (columns) to be involved intro training and predictions
cols = list(train)[0:5]
print(cols)

In [None]:
train = train.astype(str)

test = test.astype(str)

for i in cols:
    for j in range(0, len(train)):
        train[i][j] = train[i][j].replace(',', '')


for i in cols:
    for j in range(0, len(test)):
        test[i][j] = test[i][j].replace(',', '')
        

train = train.astype(float)
test= test.astype(float)

In [None]:
train_df = train

In [None]:
train_df['Timestep'] = np.arange(start = 1, stop = len(train_df) + 1 , step = 1 )

# Melt the data to have all the prices in a single column
training_melt_df = pd.melt(train_df[['Timestep','Open','High','Low','Close']],id_vars= ['Timestep'],value_vars=['Open','High','Low','Close'])
training_melt_df = training_melt_df.rename(columns = {'variable':'Stock price'})

In [None]:
import seaborn as sns

ax1 = sns.lineplot(x="Timestep", y="value", hue="Stock price",linewidth=1,data=training_melt_df).set(ylabel='price')
plt.show()

In [None]:
# Divide by 2.002, for split share going from 1000 to 2002
train['Close'] = np.where(train['High'] < train['Close'], train['Close'] / 2.002, train['Close'])

# Replacing Close price if above High or below Low
train['Close'] = np.where(train['High'] < train['Close'], train['High'], train['Close'])
train['Close'] = np.where(train['Low'] > train['Close'], train['Low'], train['Close'])

In [None]:
# Melt the data to have all the prices
training_melt_df = pd.melt(train[['Timestep','Open','High','Low','Close']],id_vars= ['Timestep'],value_vars=['Open','High','Low','Close'])
training_melt_df = training_melt_df.rename(columns = {'variable':'Stock price'})

#lineplot with the prices
ax1 = sns.lineplot(x="Timestep", y="value", hue="Stock price",linewidth=1,data=training_melt_df).set(ylabel='Stock price')

In [None]:
train = train.drop(columns=['Timestep','Volume'])

In [None]:
test = test.drop(columns=['Volume'])

In [None]:
# Min max scaling

# Open Price
open_minimum, maximum_open = train['Open'].min(), train['Open'].max()
train['Open'] = (train['Open'] - open_minimum) / (maximum_open - open_minimum)

# Low Price
minimum_low, maximum_low = train['Low'].min(), train['Low'].max()
train['Low'] = (train['Low'] - minimum_low) / (maximum_low - minimum_low)

# High Price
minimum_high, maximum_high = train['High'].min(), train['High'].max()
train['High'] = (train['High'] - minimum_high) / (maximum_high - minimum_high)

# Close Price
minimum_close, maximum_close = train['Close'].min(), train['Close'].max()
train['Close'] = (train['Close'] - minimum_close) / (maximum_close - minimum_close)


In [None]:
# The test data will be rescaled with the same coefficient as the train data, as we are not seeing it yet
test['Open'] = (test['Open'] - open_minimum) / (maximum_open - open_minimum)
test['Low'] = (test['Low'] - minimum_low) / (maximum_low - minimum_low)
test['High'] = (test['High'] - minimum_high) / (maximum_high - minimum_high)
test['Close'] = (test['Close'] - minimum_close) / (maximum_close - minimum_close)


In [None]:
'''
cols = list(train)[0:4]
print(cols)
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler

f_transformer = MinMaxScaler()
train.loc[:, cols] = f_transformer.fit_transform(train[cols].to_numpy())
test.loc[:, cols] = f_transformer.transform(test[cols].to_numpy())

open_transformer =MinMaxScaler()
train['Open'] = open_transformer.fit_transform(train[['Open']])
test['Open'] = open_transformer.transform(test[['Open']])
'''

In [None]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [None]:
# Append the train and test data
train_test =  train.append(test)
#train_test = np.append(train, test, axis = None)
train_test

In [None]:

data = train_test[['Open','High','Low','Close']].values
window_size = 40
# Empty list were we will store the result
data_window = []

n = len(data)

# create all possible sequences of length seq_len
for index in range(0, n - window_size): 
    data_window.append(data[index:(index + window_size + 1),:])

data_window  = np.array(data_window)

x_full = data_window[:,:-1,:]
y_full = data_window[:,-1,:]


In [None]:
# Splitting into train, validation and test data
x_train = x_full[:(len(train) - 40 - len(test)),:,:]
y_train = y_full[:(len(train) - 40 - len(test)),:]

x_val = x_full[(len(train) - 40 - len(test)):(len(test) - 40),:,:]
y_val = y_full[(len(train) - 40 - len(test)):(len(test) - 40),:]

x_test = x_full[(len(train) - 40):,:,:]
y_test = y_full[(len(train) - 40):,:]

In [None]:
print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape)

In [None]:
X_train, y_train,X_val,Y_val,X_test,y_test = np.array(x_train), np.array(y_train),np.array(x_val), np.array(y_val),np.array(x_test), np.array(y_test)

In [None]:
'''
time_steps = 1

# reshape to [samples, time_steps, n_features]

X_train, y_train = create_dataset(train, train['Open'], time_steps)
X_test, y_test = create_dataset(test, test['Open'], time_steps)

print(X_train.shape, y_train.shape)
'''

In [None]:
lr = 1e-3
n_outputs = y_train.shape[1]
model = keras.Sequential()
model.add(
  keras.layers.Bidirectional(
    keras.layers.LSTM(
      units=64, 
      input_shape=(X_train.shape[1], X_train.shape[2])
    )
  )
)
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(n_outputs, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
history = model.fit(
    X_train, y_train, 
    epochs=1000, 
    batch_size=32,
    validation_data = (X_val, Y_val),
    shuffle=False
)

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.legend();

In [None]:
y_pred = model.predict(X_test)

In [None]:
test_original = test_data.iloc[0:len(test_data)]

In [None]:
predicted_df = pd.DataFrame(y_pred,columns=['Predicted_Open','Predicted_High','Predicted_Low','Predicted_Close'])


In [None]:
# Get the original prices in the predicted dataset
predicted_df['Predicted_Open'] = (predicted_df['Predicted_Open'] * (maximum_open-open_minimum)) + open_minimum
predicted_df['Predicted_High'] = (predicted_df['Predicted_High'] * (maximum_high-minimum_high)) + minimum_high
predicted_df['Predicted_Low'] = (predicted_df['Predicted_Low'] * (maximum_low-minimum_low)) + minimum_low
predicted_df['Predicted_Close'] = (predicted_df['Predicted_Close'] * (maximum_close-minimum_close)) + minimum_close

In [None]:
predicted_df['Timestep'] = np.arange(start = 1, stop = len(predicted_df) + 1 , step = 1 )
predicted_df['Open'] = test_original['Open'].astype(float)
test_original = test_original.set_index(predicted_df.index)
predicted_df['Open'] = test_original.iloc[:,0].values

In [None]:
predicted_df

In [None]:
# Melt the data to have all the prices
predicted_df_melt = pd.melt(predicted_df[['Timestep','Open','Predicted_Open']],id_vars= ['Timestep'],value_vars=['Open','Predicted_Open'])
predicted_df_melt = predicted_df_melt.rename(columns = {'variable':'Stock price'})

#lineplot with the prices
ax1 = sns.lineplot(x="Timestep", y="value", hue="Stock price",linewidth=1,data=predicted_df_melt).set(ylabel='Stock price')

In [None]:
# Calculation of the root mean squared error
def rmse(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

In [None]:
rootMeanSquareError = rmse(test_original['Open'],predicted_df['Predicted_Open'])
print('RMSE = {}'.format(rootMeanSquareError))