In [1]:
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import adfuller

import sys
sys.path.insert(0, "../Src/")

%autosave 5

Autosaving every 5 seconds


In [2]:
def loadData(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2024-10-13")[['Close','High','Low']].reset_index()

    columnRenameDict = {
        'Date' : 'ds',
        'Close': 'y',
        'High': 'cap',
        'Low': 'floor'
    }
    
    data = data.rename(columns = columnRenameDict)

    data = createRollingAverageDF(data)

    data = createBollingerBands(data,20)
    
    return data

In [3]:
def splitData(data,tillDateAsString='2024-03-01'):
    cutoff_date = pd.to_datetime(tillDateAsString)
    train = data[data['ds'] <= cutoff_date]
    test = data[data['ds'] > cutoff_date]
    return train,test

In [4]:
def processDataForLSTM(data, timeStep=20):
    data = np.array(data)
    X, y = [], []
    for i in range(len(data) - timeStep - 1):
        inputValues = list(np.array(data[i:i + timeStep]).reshape(-1, 1))
        if (i + timeStep) < len(data):
            X.append(inputValues)
            y.append(data[i + timeStep])
        else:
            print(f"Index {i + timeStep} is out of bounds for data length {len(data)}")
    return np.array(X), np.array(y)
    

In [5]:
def createRollingAverageDF(df):
    columns = [str(col) for col in df.columns]
    for col in columns:
        if col != 'ds':
            df[col] = df[col].rolling(window=5).mean()
            mean_value = np.nanmean(df[col])
            # Fill NaN values with the mean
            df[col] = np.where(np.isnan(df[col]), mean_value, df[col])
    return df

In [6]:
def createBollingerBands(df,n=5,m=2):
    # Using implementation from https://tcoil.info/compute-bollinger-bands-for-stocks-with-python-and-pandas/
    TP = (df['y'] + df['cap'] + df['floor'])/3
    B_MA = pd.Series((TP.rolling(n, min_periods=n).mean()), name='B_MA')
    sigma = TP.rolling(n, min_periods=n).std() 
    BU = pd.Series((B_MA + m * sigma), name='BU')
    BL = pd.Series((B_MA - m * sigma), name='BL')
    
    df = df.join(B_MA)
    df = df.join(BU)
    df = df.join(BL)

    for col in [str(col) for col in df.columns]:
        if col != 'ds':
            mean_value = np.nanmean(df[col])
            # Fill NaN values with the mean
            df[col] = np.where(np.isnan(df[col]), mean_value, df[col])
    
    return df

In [7]:
def main():
    train,test = splitData(loadData("MSFT"))
    display(train)
    display(test)
    XTrain,yTrain = processDataForLSTM(train['y'])
    XTest,yTest = processDataForLSTM(test['y'])
    print('XTrain.shape: ', XTrain.shape)
    print('yTrain.shape: ', yTrain.shape)
    print('XTest.shape: ', XTest.shape)
    print('yTest.shape: ', yTest.shape)

In [8]:
if __name__ == '__main__':
    main()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ds,y,cap,floor,B_MA,BU,BL
0,2020-01-02,288.248272,291.071496,285.172016,287.722930,301.048357,274.397504
1,2020-01-03,288.248272,291.071496,285.172016,287.722930,301.048357,274.397504
2,2020-01-06,288.248272,291.071496,285.172016,287.722930,301.048357,274.397504
3,2020-01-07,288.248272,291.071496,285.172016,287.722930,301.048357,274.397504
4,2020-01-08,159.187997,160.050000,157.634000,287.722930,301.048357,274.397504
...,...,...,...,...,...,...,...
1043,2024-02-26,406.900000,409.525995,404.026001,407.858999,414.093581,401.624416
1044,2024-02-27,407.838000,410.291998,405.194000,408.074999,413.956432,402.193566
1045,2024-02-28,408.946002,411.693994,406.814001,408.240232,414.043234,402.437230
1046,2024-02-29,409.344006,411.967999,406.284003,408.402932,414.116720,402.689145


Unnamed: 0,ds,y,cap,floor,B_MA,BU,BL
1048,2024-03-04,411.852008,413.008002,407.658008,408.789833,414.427955,403.151711
1049,2024-03-05,410.886005,414.194000,407.016010,408.991400,414.598182,403.384618
1050,2024-03-06,409.760004,413.366003,405.630011,409.176367,414.592639,403.760095
1051,2024-03-07,408.860004,412.482001,404.894006,409.233868,414.603686,403.864050
1052,2024-03-08,407.004004,411.392004,403.584003,409.125035,414.559697,403.690372
...,...,...,...,...,...,...,...
1198,2024-10-07,415.992004,421.541998,414.756000,425.704232,442.813101,408.595364
1199,2024-10-08,414.796002,418.977997,412.628003,426.077499,441.832378,410.322620
1200,2024-10-09,414.862000,418.489996,412.146002,426.328699,441.146084,411.511315
1201,2024-10-10,414.721997,418.050000,411.917999,426.411432,440.936753,411.886112


XTrain.shape:  (1027, 20, 1)
yTrain.shape:  (1027,)
XTest.shape:  (134, 20, 1)
yTest.shape:  (134,)
