> **Importing Libraries**

In [None]:
!pip install ta

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf 
from tensorflow import keras
sns.set_style("whitegrid")
plt.style.use("fivethirtyeight")
%matplotlib inline

from datetime import datetime

from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, LSTM
import ta
import warnings
warnings.filterwarnings("ignore")
from datetime import date

> **Read File**

In [None]:
data = pd.read_csv('/kaggle/input/google-stock-prediction/GOOG.csv')
data = pd.DataFrame(data)
data

> **Preprocessing The Data**

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data = data.drop(['symbol'],axis = 1)#We dont need the 'symbol' column

In [None]:
data['date']= data['date'].str.split(' ',n = 1, expand = True)[0]#Separate date and time components
data['date']= pd.to_datetime(data['date'])#convert to pandas datetime objects
data

In [None]:
data.set_index('date',inplace = True)#setting 'date' column to index
data

In [None]:
# Create a new dataframe with only the 'Close column 
data1 = data.filter(['close'])

# Convert the dataframe to a numpy array
dataset = data1.values

# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .8 ))

training_data_len

In [None]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
train = scaled_data[0:int(training_data_len), :]

> **Creating Training Data Before Modelling**

In [None]:
time_step = 30

##### Create the training data set

x_train = [] # List to store input sequences
y_train = [] # List to store corresponding output values

# Loop through the training data to create sequences
for i in range(time_step, len(train)):
    
    # Append a sequence of 'time_step' previous values to x_train
    x_train.append(train[i-time_step:i, 0])
    
    # Append the next value after the sequence to y_train
    y_train.append(train[i, 0])
    
    # Print the progress
    if i<= (time_step+1):
        print("Current iteration:", i)
        print("x_train at this point:", x_train)
        print("y_train at this point:", y_train)
        print()

In [None]:
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_train.shape

In [None]:
# Create the testing data set
test = scaled_data[training_data_len-time_step:, :]

# Create the data sets x_test and y_test
x_test = []
y_test =  dataset[training_data_len:, :]

for i in range(time_step, len(test)):
    x_test.append(test[i-time_step:i, 0])
    
    
# Convert the data to a numpy array
x_test = np.array(x_test)
y_test = np.array(y_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

> **LSTM Model**

In [None]:
# Build the LSTM model

model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

# Train the model
MODEL = model.fit(x_train, y_train,validation_data=(x_test, y_test), batch_size=64, epochs=10)
#MODEL = model.fit(x_train, y_train, batch_size=64, epochs=10)

In [None]:
# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

> **Evaluation**

In [None]:
def plot_training_curves(history_df):
    plt.figure(figsize = (13, 4), dpi = 70)
    ax = plt.subplot(1, 2, 1)
    plt.plot(range(1, len(history_df) + 1), history_df['loss'], marker = '.', label = 'Training Loss')
    plt.plot(range(1, len(history_df) + 1), history_df['val_loss'], marker = '^', label = 'Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Cross Entropy')
    plt.grid()
    plt.legend()
    ax = plt.subplot(1, 2, 2) 
    plt.plot(range(1, len(history_df) + 1), history_df['accuracy'], marker = '.', label = 'Training Accuracy')
    plt.plot(range(1, len(history_df) + 1), history_df['val_accuracy'], marker = '^', label = 'Validation Accurcay')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.grid()
    plt.legend()
    plt.show()
    
plot_training_curves(pd.DataFrame(MODEL.history))

In [None]:
from sklearn.metrics import mean_squared_error,r2_score

r2Score_test = r2_score(y_test, predictions)
print('test R2 Score: %.2f ' % (r2Score_test))



In [None]:
# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse

> **Prediction Plot**

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(data.index[:1007],train['close'])#We use index bc we set the date to our index
plt.plot(data.index[1007:],valid[['close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()