In [1]:
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import adfuller

import sys
sys.path.insert(0, "../Src/")

%autosave 5

Autosaving every 5 seconds


In [2]:
def loadData(ticker):
    train = yf.download(ticker, start="2020-01-01", end="2024-03-01")[['Close','High','Low']].reset_index()
    test = yf.download(ticker, start="2024-03-01", end="2024-10-13")[['Close','High','Low']].reset_index()

    columnRenameDict = {
        'Date' : 'ds',
        'Close': 'y',
        'High': 'cap',
        'Low': 'floor'
    }
    
    train = train.rename(columns = columnRenameDict)
    test = test.rename(columns = columnRenameDict)

    train = createRollingAverageDF(train)
    test = createRollingAverageDF(test)
    
    return train,test

In [3]:
def processDataForLSTM(data, timeStep=20):
    data = np.array(data)
    X, y = [], []
    for i in range(len(data) - timeStep - 1):
        inputValues = list(np.array(data[i:i + timeStep]).reshape(-1, 1))
        if (i + timeStep) < len(data):
            X.append(inputValues)
            y.append(data[i + timeStep])
        else:
            print(f"Index {i + timeStep} is out of bounds for data length {len(data)}")
    return np.array(X), np.array(y)
    

In [4]:
def createRollingAverageDF(df):
    columns = [str(col) for col in df.columns]
    for col in columns:
        if col != 'ds':
            df[col] = df[col].rolling(window=5).mean()
            mean_value = np.nanmean(df[col])
            # Fill NaN values with the mean
            df[col] = np.where(np.isnan(df[col]), mean_value, df[col])
    return df

In [5]:
def main():
    train,test = loadData("MSFT")
    display(train)
    display(test)
    XTrain,yTrain = processDataForLSTM(train['y'])
    XTest,yTest = processDataForLSTM(test['y'])
    print('XTrain.shape: ', XTrain.shape)
    print('yTrain.shape: ', yTrain.shape)
    print('XTest.shape: ', XTest.shape)
    print('yTest.shape: ', yTest.shape)

In [6]:
if __name__ == '__main__':
    main()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ds,y,cap,floor
0,2020-01-02,268.051879,270.786733,265.059167
1,2020-01-03,268.051879,270.786733,265.059167
2,2020-01-06,268.051879,270.786733,265.059167
3,2020-01-07,268.051879,270.786733,265.059167
4,2020-01-08,159.187997,160.050000,157.634000
...,...,...,...,...
1042,2024-02-23,406.203998,408.751996,403.242004
1043,2024-02-26,406.900000,409.525995,404.026001
1044,2024-02-27,407.838000,410.291998,405.194000
1045,2024-02-28,408.946002,411.693994,406.814001


Unnamed: 0,ds,y,cap,floor
0,2024-03-01,423.609829,427.050158,419.983986
1,2024-03-04,423.609829,427.050158,419.983986
2,2024-03-05,423.609829,427.050158,419.983986
3,2024-03-06,423.609829,427.050158,419.983986
4,2024-03-07,408.860004,412.482001,404.894006
...,...,...,...,...
151,2024-10-07,415.992004,421.541998,414.756000
152,2024-10-08,414.796002,418.977997,412.628003
153,2024-10-09,414.862000,418.489996,412.146002
154,2024-10-10,414.721997,418.050000,411.917999


XTrain.shape:  (1026, 20, 1)
yTrain.shape:  (1026,)
XTest.shape:  (135, 20, 1)
yTest.shape:  (135,)
