# Stock prices forecasting with LSTM
## 1. Import libraries

In [4]:
import numpy as np
import pandas as pd

import os
import matplotlib.pyplot as plt
import datetime as dt
import plotly.graph_objects as go


from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Stock apis
from polygon import RESTClient

import constants as c

## 2. Preprocessing

In [5]:
# Get data from api
client = RESTClient(api_key=c.POLY_APIKEY)
ticker = "AAPL"

# List Aggregates (Bars)
bars = client.get_aggs(ticker=ticker, multiplier=1, timespan="day", from_="2022-03-01", to="2023-03-31")
df = pd.DataFrame(bars)
df['date'] = pd.to_datetime(df['timestamp'], unit='ms')
df['change'] = df['close'] - df['open']
# df

fig = go.Figure(data=[go.Candlestick(x=df['date'],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'])])

fig.show()

df

Unnamed: 0,open,high,low,close,volume,vwap,timestamp,transactions,otc,date,change
0,164.695,166.6000,161.9700,163.20,83468865.0,164.1600,1646110800000,749653,,2022-03-01 05:00:00,-1.495
1,164.390,167.3600,162.9500,166.56,79724750.0,165.8095,1646197200000,668260,,2022-03-02 05:00:00,2.170
2,168.470,168.9100,165.5500,166.23,76678441.0,166.9180,1646283600000,651764,,2022-03-03 05:00:00,-2.240
3,164.490,165.5500,162.1000,163.17,83819592.0,163.3980,1646370000000,743696,,2022-03-04 05:00:00,-1.320
4,163.360,165.0200,159.0400,159.30,96418845.0,161.4026,1646629200000,847057,,2022-03-07 05:00:00,-4.060
...,...,...,...,...,...,...,...,...,...,...,...
254,153.785,156.3000,153.4600,153.83,87558028.0,154.6895,1678078800000,691990,,2023-03-06 05:00:00,0.045
255,153.700,154.0299,151.1300,151.60,56136378.0,152.2768,1678165200000,496631,,2023-03-07 05:00:00,-2.100
256,152.810,153.4700,151.8300,152.87,47204791.0,152.6973,1678251600000,405203,,2023-03-08 05:00:00,0.060
257,153.559,154.5350,150.2250,150.59,53833122.0,152.4689,1678338000000,480909,,2023-03-09 05:00:00,-2.969


In [13]:
# Drop all columns except for closing price and date (index)
close_df = df[['date', 'close']].set_index('date')
close_df

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2022-03-01 05:00:00,163.20
2022-03-02 05:00:00,166.56
2022-03-03 05:00:00,166.23
2022-03-04 05:00:00,163.17
2022-03-07 05:00:00,159.30
...,...
2023-03-06 05:00:00,153.83
2023-03-07 05:00:00,151.60
2023-03-08 05:00:00,152.87
2023-03-09 05:00:00,150.59


In [30]:
# Normalise data
scaler = MinMaxScaler(feature_range=(0,1))      # Explore using StandardScaler instead of MinMaxScaler in the future
scaled_data = scaler.fit_transform(close_df['close'].values.reshape(-1,1))

# Number of days to base prediction on:
N_PRED_DAYS = 50

x_train, y_train = [], []

for i in range(len(scaled_data) - N_PRED_DAYS):
    x_train.append(scaled_data[i:i+N_PRED_DAYS, 0])
    y_train.append(scaled_data[i+N_PRED_DAYS, 0])


x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

x_train, y_train

(array([[0.70782351, 0.77011494, 0.76399703, ..., 0.59807193, 0.50129774,
         0.54671858],
        [0.77011494, 0.76399703, 0.70726733, ..., 0.50129774, 0.54671858,
         0.39822024],
        [0.76399703, 0.70726733, 0.63552095, ..., 0.54671858, 0.39822024,
         0.32517612],
        ...,
        [0.13366704, 0.12680756, 0.09288098, ..., 0.48220245, 0.53411198,
         0.49276974],
        [0.12680756, 0.09288098, 0.0189099 , ..., 0.53411198, 0.49276974,
         0.51631442],
        [0.09288098, 0.0189099 , 0.08509455, ..., 0.49276974, 0.51631442,
         0.47404524]]),
 array([0.39822024, 0.32517612, 0.40952911, 0.38042269, 0.44901743,
        0.29291806, 0.22858732, 0.23303671, 0.33537264, 0.28439006,
        0.28735632, 0.34779385, 0.45643307, 0.44160178, 0.43919169,
        0.48553949, 0.37745643, 0.39154616, 0.43919169, 0.42528736,
        0.32665925, 0.22450871, 0.12717835, 0.14349277, 0.19299221,
        0.09343715, 0.12124583, 0.20114943, 0.19150908, 0.24564331,
 

## 3. Build LSTM model