In [1]:
import warnings
import tensorflow as tf
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.stattools import adfuller

import sys
sys.path.insert(0, "../Src/")

%autosave 5

Autosaving every 5 seconds


In [2]:
def loadData(ticker):
    data = yf.download(ticker, start="2020-01-01", end="2024-10-13")[['Close','High','Low']].reset_index()

    columnRenameDict = {
        'Date' : 'ds',
        'Close': 'y',
        'High': 'cap',
        'Low': 'floor'
    }
    
    data = data.rename(columns = columnRenameDict)

    data = createRollingAverageDF(data)
    
    return data

In [3]:
def splitData(data,tillDateAsString='2024-03-01'):
    cutoff_date = pd.to_datetime(tillDateAsString)
    train = data[data['ds'] <= cutoff_date]
    test = data[data['ds'] > cutoff_date]
    return train,test

In [4]:
def processDataForLSTM(data, timeStep=20):
    data = np.array(data)
    X, y = [], []
    for i in range(len(data) - timeStep - 1):
        inputValues = list(np.array(data[i:i + timeStep]).reshape(-1, 1))
        if (i + timeStep) < len(data):
            X.append(inputValues)
            y.append(data[i + timeStep])
        else:
            print(f"Index {i + timeStep} is out of bounds for data length {len(data)}")
    return np.array(X), np.array(y)
    

In [5]:
def createRollingAverageDF(df):
    columns = [str(col) for col in df.columns]
    for col in columns:
        if col != 'ds':
            df[col] = df[col].rolling(window=5).mean()
            mean_value = np.nanmean(df[col])
            # Fill NaN values with the mean
            df[col] = np.where(np.isnan(df[col]), mean_value, df[col])
    return df

In [6]:
def main():
    train,test = splitData(loadData("MSFT"))
    display(train)
    display(test)
    XTrain,yTrain = processDataForLSTM(train['y'])
    XTest,yTest = processDataForLSTM(test['y'])
    print('XTrain.shape: ', XTrain.shape)
    print('yTrain.shape: ', yTrain.shape)
    print('XTest.shape: ', XTest.shape)
    print('yTest.shape: ', yTest.shape)

In [7]:
if __name__ == '__main__':
    main()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,ds,y,cap,floor
0,2020-01-02,288.248272,291.071496,285.172016
1,2020-01-03,288.248272,291.071496,285.172016
2,2020-01-06,288.248272,291.071496,285.172016
3,2020-01-07,288.248272,291.071496,285.172016
4,2020-01-08,159.187997,160.050000,157.634000
...,...,...,...,...
1043,2024-02-26,406.900000,409.525995,404.026001
1044,2024-02-27,407.838000,410.291998,405.194000
1045,2024-02-28,408.946002,411.693994,406.814001
1046,2024-02-29,409.344006,411.967999,406.284003


Unnamed: 0,ds,y,cap,floor
1048,2024-03-04,411.852008,413.008002,407.658008
1049,2024-03-05,410.886005,414.194000,407.016010
1050,2024-03-06,409.760004,413.366003,405.630011
1051,2024-03-07,408.860004,412.482001,404.894006
1052,2024-03-08,407.004004,411.392004,403.584003
...,...,...,...,...
1198,2024-10-07,415.992004,421.541998,414.756000
1199,2024-10-08,414.796002,418.977997,412.628003
1200,2024-10-09,414.862000,418.489996,412.146002
1201,2024-10-10,414.721997,418.050000,411.917999


XTrain.shape:  (1027, 20, 1)
yTrain.shape:  (1027,)
XTest.shape:  (134, 20, 1)
yTest.shape:  (134,)
