In [1]:
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import adfuller

import sys
sys.path.insert(0, "../Src/")

%autosave 5

Autosaving every 5 seconds


In [2]:
def loadData(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2024-10-13")[['Close','High','Low']].reset_index()

    columnRenameDict = {
        'Date' : 'ds',
        'Close': 'y',
        'High': 'cap',
        'Low': 'floor'
    }
    
    data = data.rename(columns = columnRenameDict)

    data = createBollingerBands(data,20)
    
    return data

In [3]:
def splitData(data,tillDateAsString='2024-03-01'):
    cutoff_date = pd.to_datetime(tillDateAsString)
    train = data[data['ds'] <= cutoff_date]
    test = data[data['ds'] > cutoff_date]
    return train,test

In [4]:
def processDataForLSTM(data, timeStep=20):
    data = np.array(data)
    X, y = [], []
    for i in range(len(data) - timeStep - 1):
        inputValues = list(np.array(data[i:i + timeStep]).reshape(-1, 1))
        if (i + timeStep) < len(data):
            X.append(inputValues)
            y.append(data[i + timeStep])
        else:
            print(f"Index {i + timeStep} is out of bounds for data length {len(data)}")
    return np.array(X), np.array(y)
    

In [5]:
def createBollingerBands(df,n=5,m=2):
    # Using implementation from https://tcoil.info/compute-bollinger-bands-for-stocks-with-python-and-pandas/
    TP = (df['y'] + df['cap'] + df['floor'])/3
    B_MA = pd.Series((TP.rolling(n, min_periods=n).mean()), name='B_MA')
    sigma = TP.rolling(n, min_periods=n).std() 
    BU = pd.Series((B_MA + m * sigma), name='BU')
    BL = pd.Series((B_MA - m * sigma), name='BL')
    
    df = df.join(B_MA)
    df = df.join(BU)
    df = df.join(BL)

    for col in [str(col) for col in df.columns]:
        if col != 'ds':
            mean_value = np.nanmean(df[col])
            # Fill NaN values with the mean
            df[col] = np.where(np.isnan(df[col]), mean_value, df[col])
    
    return df

In [6]:
def main():
    train,test = splitData(loadData("MSFT"))
    display(train)
    display(test)
    XTrain,yTrain = processDataForLSTM(train['y'])
    XTest,yTest = processDataForLSTM(test['y'])
    print('XTrain.shape: ', XTrain.shape)
    print('yTrain.shape: ', yTrain.shape)
    print('XTest.shape: ', XTest.shape)
    print('yTest.shape: ', yTest.shape)

In [7]:
if __name__ == '__main__':
    main()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ds,y,cap,floor,B_MA,BU,BL
0,2020-01-02,160.619995,160.729996,158.330002,288.113340,303.119547,273.107133
1,2020-01-03,158.619995,159.949997,158.059998,288.113340,303.119547,273.107133
2,2020-01-06,159.029999,159.100006,156.509995,288.113340,303.119547,273.107133
3,2020-01-07,157.580002,159.669998,157.320007,288.113340,303.119547,273.107133
4,2020-01-08,160.089996,160.800003,157.949997,288.113340,303.119547,273.107133
...,...,...,...,...,...,...,...
1043,2024-02-26,407.540009,412.160004,407.359985,408.454665,417.839213,399.070117
1044,2024-02-27,407.480011,408.320007,403.850006,408.381666,417.803418,398.959914
1045,2024-02-28,407.720001,409.299988,405.320007,408.285832,417.704532,398.867132
1046,2024-02-29,413.640015,414.200012,405.920013,408.680000,417.890005,399.469995


Unnamed: 0,ds,y,cap,floor,B_MA,BU,BL
1048,2024-03-04,414.920013,417.350006,412.320007,409.443834,419.081599,399.806069
1049,2024-03-05,402.649994,414.250000,400.640015,409.389501,419.098843,399.680158
1050,2024-03-06,402.089996,405.160004,398.390015,409.210668,419.347360,399.073976
1051,2024-03-07,409.140015,409.779999,402.239990,408.967502,419.064126,398.870878
1052,2024-03-08,406.220001,410.420013,404.329987,408.613669,418.450536,398.776802
...,...,...,...,...,...,...,...
1198,2024-10-07,409.540009,417.109985,409.000000,426.594499,442.866726,410.322272
1199,2024-10-08,414.709991,415.660004,408.170013,426.599666,442.853431,410.345901
1200,2024-10-09,417.459991,420.380005,414.299988,426.525166,442.940958,410.109373
1201,2024-10-10,415.839996,417.350006,413.149994,426.062166,443.200369,408.923963


XTrain.shape:  (1027, 20, 1)
yTrain.shape:  (1027,)
XTest.shape:  (134, 20, 1)
yTest.shape:  (134,)
