# Stock Prediction

## Data preparation

In [None]:
# Imports the pandas library — a powerful Python tool for working with structured data (like tables, spreadsheets, and time series)
import pandas as pd

# Import math module for mathematical functions and constants, including those defined by the C standard.
import math

# Import NumPy for adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays.
import numpy as np

# Import MinMaxScaler for data preprocessing
from sklearn.preprocessing import MinMaxScaler

# Import the data from the csv file
data = pd.read_csv('Uber.csv')

# Reset the index
data = data.reset_index()

# Remove the rows with missing 'Close' values using boolean masking
data = data[data['Close'].notna()]

# Extract the Close Price column as a Data Frame
data_frame = pd.DataFrame()
data_frame['Close'] = data['Close']

# Convert the Data frame into a numpy array
dataset = data_frame.values

# Calculate the number of rows to train the model on.
training_data_len = math.ceil(len(dataset) - 50)

# Scale all of the data to be between 0 and 1
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = min_max_scaler.fit_transform(dataset)


## Extract the training data

In [None]:

# Extract the training data from the scaled dataset
training_data = scaled_data[0 : training_data_len , :]

# Split the data into x_train and y_train dataset
x_train = []
y_train = []

for i in range(10, len(training_data)):
    # Append the previous 10 values to x_train (10-day look back)
    x_train.append(training_data[i-10:i,0])
    
    # Append the current value to y_train
    y_train.append(training_data[i,0])

# Convert x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

# Convert the data into 3D array for LSTM
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

## Extract the testing data

In [None]:

# Extract the testing data from the scaled dataset
testing_data = scaled_data[training_data_len - 10: , : ]

# Split the data into x_test and y_test dataset
x_test = []
y_test = []

for i in range(10, len(testing_data)):
    # Append the previous 10 values to x_test (10-day look back)
    x_test.append(testing_data[i-10:i,0])
    
    # Append the current value to y_test
    y_test.append(testing_data[i,0])

# Convert x_test and y_test to numpy arrays
x_test, y_test = np.array(x_test), np.array(y_test)

# Convert the data into 3D array for LSTM
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# Models

## LSTM (50 Units)

In [None]:
# Import ML model dependencies
from keras.models import Sequential
from keras.layers import Dense, LSTM, InputLayer
from keras.optimizers import Adam
from keras.callbacks import CSVLogger
import time

### Build the LSTM model

In [None]:
# Build the LSTM model
model_lstm = Sequential()
model_lstm.add(InputLayer(shape=(x_train.shape[1],1)))
model_lstm.add(LSTM(units=50, return_sequences=False))
model_lstm.add(Dense(units=1))

# Compile the model
model_lstm.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')


### Initialize the CSV logger callback

In [None]:
csv_logger_lstm = CSVLogger("model_history_log_lstm.csv", append=False)

### Train the LSTM (50 units)

In [None]:

# Get the training start time
lstm_training_start_time = time.time()
 
#Train the model
model_lstm.fit(x_train, y_train, batch_size=10, epochs=40,validation_data=(x_test, scaled_data[training_data_len : , : ]),callbacks=[csv_logger_lstm]) 

# Get the training end time
lstm_training_end_time = time.time()

# Calculate the total training time
lstm_total_training_time = lstm_training_end_time - lstm_training_start_time

# Print the total LSTMtm training time
print(lstm_total_training_time)


### Perform Prediction

In [None]:
# Predict the price values
lstm_prediction = model_lstm.predict(x_test) 

# Undo min max scaling
lstm_prediction = min_max_scaler.inverse_transform(lstm_prediction)

# Get the target values
lstm_prediction_target_values = min_max_scaler.inverse_transform(np.reshape(y_test, (50, 1)))

# Calculate of RMSE
rmse_lstm = np.sqrt(np.mean(((lstm_prediction - lstm_prediction_target_values)**2)))

# Print the RMSE
print(rmse_lstm)

## LSTM (1 Unit)

### Build the LSTM model

In [None]:
# Build the LSTM model
model_lstm_single_unit = Sequential()
model_lstm_single_unit.add(InputLayer(shape=(x_train.shape[1],1)))
model_lstm_single_unit.add(LSTM(units=50, return_sequences=False))
model_lstm_single_unit.add(Dense(units=1))

# Compile the model
model_lstm_single_unit.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

### Initialize the CSV logger callback

In [None]:
csv_logger_lstm = CSVLogger("model_history_log_lstm_single_unit.csv", append=False)

### Train the LSTM (1 unit)

In [None]:

# Get the training start time
lstm_single_unit_training_start_time = time.time()
 
#Train the model
model_lstm_single_unit.fit(x_train, y_train, batch_size=10, epochs=40,validation_data=(x_test, scaled_data[training_data_len : , : ]),callbacks=[csv_logger_lstm]) 

# Get the training end time
lstm_single_unit_training_end_time = time.time()

# Calculate the total training time
lstm_single_unit_total_training_time = lstm_single_unit_training_end_time - lstm_single_unit_training_start_time

# Print the total LSTMtm training time
print(lstm_single_unit_total_training_time)


### Perform Prediction

In [None]:
# Predict the price values
lstm_single_unit_prediction = model_lstm.predict(x_test) 

# Undo min max scaling
lstm_single_unit_prediction = min_max_scaler.inverse_transform(lstm_single_unit_prediction)

# Get the target values
lstm_single_unit_prediction_target_values = min_max_scaler.inverse_transform(np.reshape(y_test, (50, 1)))

# Calculate of RMSE
rmse_lstm_single_unit = np.sqrt(np.mean(((lstm_single_unit_prediction - lstm_single_unit_prediction_target_values)**2)))

# Print the RMSE
print(rmse_lstm_single_unit)

## CNN + BiLSTM

In [None]:
# Import dependencies
from keras.layers import (
    Conv1D,
    Bidirectional,
    Dropout
)

### Build the CNN + BiLSTM model

In [None]:
model_cnn_bilstm = Sequential()
model_cnn_bilstm.add(InputLayer(shape=(x_train.shape[1],1)))
model_cnn_bilstm.add(Conv1D(filters=50, kernel_size=3, padding='same', activation='tanh'))
model_cnn_bilstm.add(Bidirectional(LSTM(units=50, return_sequences=False)))
model_cnn_bilstm.add(Dropout(0.5))
model_cnn_bilstm.add(Dense(units=1))

model_cnn_bilstm.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

### Initialize the CSV logger callback

In [None]:
csv_logger_cnn_bilstm = CSVLogger("model_history_log_cnn_bilstm.csv", append=False)

### Train the CNN + BiLSTM model

In [None]:

# Get the training start time
cnn_bilstm_training_start_time = time.time()
 
#Train the model
model_cnn_bilstm.fit(x_train, y_train, batch_size=10, epochs=40,validation_data=(x_test, scaled_data[training_data_len : , : ]),callbacks=[csv_logger_cnn_bilstm]) 

# Get the training end time
cnn_bilstm_training_end_time = time.time()

# Calculate the total training time
cnn_bilstm_total_training_time = cnn_bilstm_training_end_time - cnn_bilstm_training_start_time

# Print the total CNN + BiLSTM training time
print(cnn_bilstm_total_training_time)

## LSTM + Attention (RELU)

In [None]:
# Import dependencies
from keras.layers import (
    Flatten,
)
from keras_self_attention import SeqSelfAttention
from sklearn.metrics import r2_score

### Build the LSTM + Attention (RELU) model

In [None]:
model_lstm_attention = Sequential()
model_lstm_attention.add(InputLayer(shape=(x_train.shape[1],1)))
model_lstm_attention.add(LSTM(units=50, return_sequences=True))
model_lstm_attention.add(SeqSelfAttention(attention_activation="relu"))
model_lstm_attention.add(Flatten())
model_lstm_attention.add(Dense(units=1))

model_lstm_attention.compile(optimizer=Adam(learning_rate=0.001), loss="mean_squared_error")

### Initialize the CSV logger callback

In [None]:
csv_logger_lstm_attention = CSVLogger("model_history_log_lstm_attention.csv", append=False)

### Train the LSTM + Attention (RELU) model

In [None]:

# Get the training start time
lstm_attention_training_start_time = time.time()
 
#Train the model
model_lstm_attention.fit(x_train, y_train, batch_size=10, epochs=40,validation_data=(x_test, scaled_data[training_data_len : , : ]),callbacks=[csv_logger_lstm_attention]) 

# Get the training end time
lstm_attention_training_end_time = time.time()

# Calculate the total training time
lstm_attention_total_training_time= lstm_attention_training_end_time - lstm_attention_training_start_time

# Print the total CNN + BiLSTM training time
print(lstm_attention_total_training_time)

### Perform Prediction

In [None]:
# Predict the price values
lstm_attention_prediction = model_lstm_attention.predict(x_test) 

# Undo min max scaling
lstm_attention_prediction = min_max_scaler.inverse_transform(lstm_attention_prediction)

# Get the target values
lstm_attention_prediction_target_values = min_max_scaler.inverse_transform(np.reshape(y_test, (50, 1)))

# Calculate of RMSE
rmse_lstm_attention = np.sqrt(np.mean(((lstm_attention_prediction - lstm_attention_prediction_target_values)**2)))

# Print the RMSE
print(rmse_lstm_attention)

### Calculate the R-square score

In [None]:
r_square_score = r2_score(lstm_attention_prediction_target_values, lstm_attention_prediction)
print(r_square_score)