In [49]:
import numpy as np
import pandas as pd
from pathlib import Path

%matplotlib inline

In [50]:
from numpy.random import seed
seed(1)

from tensorflow import random
random.set_seed(2)

In [51]:
# load csv into dataframe
df = pd.read_csv(
    Path('../data/stocks_history.csv'),
    index_col='Unnamed: 0',
    infer_datetime_format=True,
    parse_dates=True
)
df.head()

Unnamed: 0,MSFT_open,MSFT_high,MSFT_low,MSFT_close,AMD_open,AMD_high,AMD_low,AMD_close,TSLA_open,TSLA_high,...,JNJ_low,JNJ_close,REGN_open,REGN_high,REGN_low,REGN_close,GILD_open,GILD_high,GILD_low,GILD_close
2010-06-29,24.13,24.2,23.11,23.31,7.93,7.93,7.41,7.48,19.0,25.0,...,58.68,59.24,23.67,23.95,22.86,22.98,35.4,35.61,34.74,34.97
2010-06-30,23.3,23.68,22.95,23.01,7.58,7.65,7.3,7.32,25.79,30.4192,...,58.94,59.06,23.05,23.47,22.32,22.32,34.83,35.13,34.26,34.28
2010-07-01,23.09,23.32,22.73,23.16,7.35,7.53,7.1,7.39,25.0,25.92,...,58.65,59.07,22.31,22.37,20.45,20.79,34.24,34.27,33.3,34.14
2010-07-02,23.36,23.48,23.05,23.27,7.45,7.48,7.02,7.17,23.0,23.1,...,58.85,59.08,21.06,21.88,20.75,21.61,34.38,35.16,34.18,34.87
2010-07-06,23.7,24.09,23.584,23.82,7.4,7.42,6.96,7.04,20.0,20.0,...,58.669,59.08,22.03,22.03,21.16,21.36,35.11,35.42,34.415,34.77


In [52]:
# drop all columns except closing prices
dropped_columns = [
    'MSFT_open',
    'MSFT_high',
    'MSFT_low',
    'AAPL_open',
    'AAPL_high',
    'AAPL_low',
    'TSLA_open',
    'TSLA_high',
    'TSLA_low',
    'JNJ_open',
    'JNJ_high',
    'JNJ_low',
    'REGN_open',
    'REGN_high',
    'REGN_low',
    'GILD_open',
    'GILD_high',
    'GILD_low'
]
df.drop(columns=dropped_columns, inplace=True)
df.head()

KeyError: "['AAPL_open' 'AAPL_high' 'AAPL_low'] not found in axis"

In [None]:
df.describe()

In [None]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i : (i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [None]:
# Creating the features (X) and target (y) data using the window_data() function.
window_size = 15 # iterate over this to measure accuracy change vs window size

feature_column = 0 # iterate over this to predict each stock in dataframe
target_column = 0 # iterate over this to predict each stock in dataframe
X, y = window_data(df, window_size, feature_column, target_column)

In [None]:
# Use 70% of the data for training and the remainder for testing
split = int(0.7 * len(X))
X_train = X[: split - 1]
X_test = X[split:]
y_train = y[: split - 1]
y_test = y[split:]

In [None]:
# Use the MinMaxScaler to scale data between 0 and 1.
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(y)
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [None]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Import required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
# Define the LSTM RNN model.
model = Sequential()

number_units = 15 # should be equal to the size of the time window as selected above
dropout_fraction = 0.25 # means that each epoch will randomly drop x% - iterate over

# Layer 1
model.add(LSTM(
    units=number_units,
#     return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
# model.add(LSTM(units=number_units, return_sequences=True))
# model.add(Dropout(dropout_fraction))
# Layer 3
# model.add(LSTM(units=number_units))
# model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

In [None]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
# Summarize the model
model.summary()

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=30, shuffle=False, batch_size=10, verbose=1)

In [None]:
# Evaluate the model
model.evaluate(X_test, y_test)

In [None]:
# Make some predictions
predicted = model.predict(X_test)

In [None]:
# Recover the original prices instead of the scaled version
predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
})
stocks.head()

In [None]:
# Plot the real vs predicted prices as a line chart
stocks.plot()

In [None]:
stocks

In [None]:
from sklearn.metrics import mean_squared_error

print(f'MSE:{mean_squared_error(stocks.iloc[:,0],stocks.iloc[:,1], squared=True)}')
print(f'RMSE:{mean_squared_error(stocks.iloc[:,0],stocks.iloc[:,1], squared=False)}')