# Simple Stock Price Prediction Model

Created by Marc Zeugin

### Import modules

In [None]:
from numpy.random import seed
seed(1)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, max_error
from nn_models import lstm_prediction, dnn_prediction
from sklearn.model_selection import train_test_split
from plot_graph import plot_v_models, plot_v_stocks
from sklearn.preprocessing import MinMaxScaler
from get_stock_data import get_stock_data
from get_macro_data import get_macro_data
from models import predict_regression
from prettytable import PrettyTable
import matplotlib.pyplot as plt
from forecast import user_input
plt.style.use('seaborn-whitegrid')
import seaborn as sns
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import math

### Set options

In [None]:
models = ['BR', 'R', 'RF'] # available options are CBR (CatBoostRegressor), SVM (SupportVectorRegressor), HGBR (HistGradientBoostingRegressor), BaR (BaggingRegressor),
# GBR (GradientBoostingRegressor), ABR (AdaBoostRegressor), GPR (GaussianProcessRegressor), R (Ridge), LL (LassoLars), BR (BayesianRidge), LR (LinearRegression), 
# RF (RandomForestRegressor), and XGB (XGBRegressor), KNN (KNeighborsRegressor), L (Lasso), MLP (MLPRegressor), LGBM (LGBMRegressor), ET (ExtraTreeRegressor), 
# XGBRF (XGBRFRegressor), PR (PoissonRegressor), PAR (PassiveAggressiveRegressor)
training_data_split = 0.8 # as a decimal
shift = 2 # in days
new_stocks = ['AAL', 'AAP', 'AAPL', 'AMZN', 'FTNT', 'GOOG', 'HPE', 'INTC', 'META', 'MSFT'] # add strings of symbols of new stocks to include
start_date = "2016-01-01" # start date of stock data
end_date = "2021-12-29" # end date of stock data
cv_num = 3 # number of crossvalidations to do for hyperparameter tuning
trial_num = 10 # number of trials to find optimal hyperparameters

### Load financial data from yahoo finance API or local folder if available

In [None]:
ticker_hist_list, tickers = get_stock_data(new_stocks, start_date, end_date)

### Display all stock closing price plots

In [None]:
plot_v_stocks(tickers, ticker_hist_list)

### Create one dataset with all stock data

In [None]:
stock_data = pd.concat(ticker_hist_list, axis=1)
col_names = ['open', 'high', 'low', 'close', 'volume']
stock_data.columns = [col_name + '.' + ticker for ticker in tickers for col_name in col_names]
stock_data.index.name = 'Date'
print(stock_data.shape)

In [None]:
plot_df = pd.DataFrame()
for column in stock_data.columns:
    if column.startswith('close'):
        plot_df[column] = stock_data[column].pct_change()
sns.pairplot(plot_df, kind='reg')

### Create stock data features

In [None]:
for ticker in tickers:
    temp_ticker = f'Close.{ticker}'
    weekly_mean = stock_data.rolling(7).mean()[temp_ticker]
    stock_data[f'weekly_mean.{ticker}'] = weekly_mean / stock_data[temp_ticker]
    stock_data[f'open_close_ratio.{ticker}'] = stock_data[f'Open.{ticker}'] / stock_data[temp_ticker]
    stock_data[f'high_close_ratio.{ticker}'] = stock_data[f'High.{ticker}'] / stock_data[temp_ticker]
    stock_data[f'low_close_ratio.{ticker}'] = stock_data[f'Low.{ticker}'] / stock_data[temp_ticker]
    stock_data[f'high_minus_low.{ticker}'] = stock_data[f'High.{ticker}'] - stock_data[f'Low.{ticker}']
    stock_data[f'daily_return.{ticker}'] = stock_data[temp_ticker].pct_change()
stock_data = stock_data.iloc[6:]

In [None]:
stock_data.describe()

### Pull macroeconomic data from API

In [None]:
macro_data = get_macro_data('FRED', True) # Specify the API to use (FRED, IMF, WB or ALL) and whether to load from macrodata.csv or download data

### Adjust datastructure to match stock data

In [None]:
for index in macro_data.index:
    if index not in stock_data.index:
        macro_data.drop(index, axis=0, inplace=True)
print(macro_data.shape)

In [None]:
macro_data.describe()

### Add macroeconomic data to dataset

In [None]:
all_data = pd.concat([stock_data, macro_data], axis=1)
print(all_data.shape)

### Add lagged features to the dataset

In [None]:
for col in all_data:
    for i in range(1, shift):
        all_data[col + str(-i)] = all_data[col].shift(i)

In [None]:
# remove first i instances because of NaN from backward shifting
for i in range(1, shift):
    all_data.drop(index=all_data.index[0], axis=0, inplace=True)

### Split into train and test

In [None]:
colum_name = f'Close.{"AAP"}'

X = all_data.copy()
X = X.drop(columns=[colum_name])
y = all_data[colum_name].copy()

print(f'X shape: {X.shape} & y shape: {y.shape}')

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=training_data_split, shuffle=False)

print(f'X_train shape: {X_train.shape} & X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape} & y_test shape: {y_test.shape}')

### Apply MinMaxScaler to dataset

In [None]:
scaler_x = MinMaxScaler()
X_train = scaler_x.fit_transform(X_train)
X_test = scaler_x.fit_transform(X_test)
print(f'X_train shape: {X_train.shape} & X_test shape: {X_test.shape}')
scaler_y = MinMaxScaler()
y_train = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test = scaler_y.fit_transform(y_test.values.reshape(-1, 1))
print(f'X_train shape: {X_train.shape} & X_test shape: {X_test.shape}')

### Reshape input for LSTM model to match requirements

In [None]:
X_train_nn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_nn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
print(f'X_train_nn shape: {X_train_nn.shape} & X_test_nn shape: {X_test_nn.shape}')
y_train_nn = y_train.copy()
y_test_nn = y_test.copy()
print(f'y_train_nn shape: {y_train.shape} & y_test_nn shape: {y_test.shape}')

### Fit LSTM model

In [None]:
history_lstm, test_predict_lstm = lstm_prediction(X_train_nn, y_train_nn, X_test_nn, y_test_nn)

In [None]:
print(history_lstm.history.keys())
plt.figure(figsize=(16, 8))

# Accuracy
plt.plot(history_lstm.epoch, history_lstm.history['loss'], label = "mse")
plt.plot(history_lstm.epoch, history_lstm.history['val_loss'], label = "val mse")
plt.title("MSE", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("MSE", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()

plt.show()

### Calculate RMSE and MAE

In [None]:
print(f'RMSE: {math.sqrt(mean_squared_error(y_test, test_predict_lstm))}')
print(f'MSE: {mean_squared_error(y_test, test_predict_lstm)}')
print(f'MAE: {mean_absolute_error(y_test, test_predict_lstm)}')

### Plot prediction vs actual close price

In [None]:
plt.figure(figsize=(16, 8))
plt.title(f'Ticker Predicted Closing Price Vs Actual Closing Price with LSTM - {tickers[2]}')
plt.plot(y_test, color='red', label='Actual Close Price')
plt.plot(test_predict_lstm, color='green', label='Predicted Closing Price')
plt.legend(loc='best')
plt.show()

### Inverse predictions (if needed)

In [None]:
# y_test = scaler_y.inverse_transform(y_test)
# test_predict_lstm = scaler_y.inverse_transform(test_predict_lstm)

### Fit LSTM model

In [None]:
history_dnn, test_predict_dnn = dnn_prediction(X_train_nn, y_train_nn, X_test_nn, y_test_nn)

In [None]:
print(history_dnn.history.keys())
plt.figure(figsize=(16, 8))

# Accuracy
plt.plot(history_dnn.epoch, history_dnn.history['loss'], label = "mse")
plt.plot(history_dnn.epoch, history_dnn.history['val_loss'], label = "val mse")
plt.title("MSE", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("MSE", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()

plt.show()

### Calculate RMSE and MAE

In [None]:
print(f'RMSE: {math.sqrt(mean_squared_error(y_test, test_predict_dnn))}')
print(f'MSE: {mean_squared_error(y_test, test_predict_dnn)}')
print(f'MAE: {mean_absolute_error(y_test, test_predict_dnn)}')

### Plot prediction vs actual close price

In [None]:
plt.figure(figsize=(16, 8))
plt.title(f'Ticker Predicted Closing Price Vs Actual Closing Price with DNN- {tickers[2]}')
plt.plot(y_test, color='red', label='Actual Close Price')
plt.plot(test_predict_dnn, color='green', label='Predicted Closing Price')
plt.legend(loc='best')
plt.show()

### Preparation for non-NN Models

In [None]:
X = all_data.drop([f'Close.{"AAP"}'], axis=1)
y = all_data[f'Close.{"AAP"}']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=training_data_split, shuffle=False)
X_train.sort_index(inplace=True)
X_test.sort_index(inplace=True)
y_train.sort_index(inplace=True)
y_test.sort_index(inplace=True)
print(f'Training sample size: {X_train.shape[0]}, {X_train.shape[1]} and testing sample size: {X_test.shape[0]}, {X_test.shape[1]}.')

### Apply MinMaxScaler to dataset

In [None]:
scaler_x2 = MinMaxScaler()
scaler_y2 = MinMaxScaler()
X_train = scaler_x2.fit_transform(X_train)
X_test = scaler_x2.fit_transform(X_test)
y_train = scaler_y2.fit_transform(y_train.values.reshape(-1, 1))
y_test = scaler_y2.fit_transform(y_test.values.reshape(-1, 1))

### Prediction with selected models

In [None]:
# This step may take a while depending on the models selected, number of models, and your pc specs
y_pred, studies, comp_models = predict_regression(models, X_train, y_train, X_test, cv_num, trial_num)

In [None]:
x = PrettyTable()
x.field_names = ["Model", "R2 Score", 'RMSE', 'MAE', 'Max Error']

for key, value in y_pred.items():
    x.add_row([key, format(r2_score(y_test, value), '.4f'), format(math.sqrt(mean_squared_error(y_test, value)), '.4f'), \
               format(mean_absolute_error(y_test, value), '.4f'), format(max_error(y_test, value), '.4f')])

print(x)

### Plot prediction vs actual close price

In [None]:
plot_v_models(models, y_test, y_pred)

### Function to predict user selected stock performance

In [None]:
# user_input()