# Retrieve Stock Data and Save to DF

In [59]:
from pandas_datareader import data as web
import os
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

def get_stock(ticker, start_date, end_date, s_window, l_window):
    try:
        #yf.pdr_override()
        df = yf.download(ticker, start=start_date, end=end_date,auto_adjust=False)
        #print("DF: ",df)
# can use this as well        df = web.get_data_yahoo(ticker, start=start_date, end=end_date)
        df['Return'] = df['Adj Close'].pct_change()
        df['Return'].fillna(0, inplace = True)
        df['Date'] = df.index
        df['Date'] = pd.to_datetime(df['Date'])
        df['Month'] = df['Date'].dt.month
        df['Year'] = df['Date'].dt.year 
        df['Day'] = df['Date'].dt.day
        for col in ['Open', 'High', 'Low', 'Close', 'Adj Close']:
            df[col] = df[col].round(2)
        df['Weekday'] = df['Date'].dt.day_name()
        df['Week_Number'] = df['Date'].dt.strftime('%U')
        df['Year_Week'] = df['Date'].dt.strftime('%Y-%U')
        df['Short_MA'] = df['Adj Close'].rolling(window=s_window, min_periods=1).mean()
        df['Long_MA'] = df['Adj Close'].rolling(window=l_window, min_periods=1).mean()        
        col_list = ['Date', 'Year', 'Month', 'Day', 'Weekday', 
                    'Week_Number', 'Year_Week', 'Open', 
                    'High', 'Low', 'Close', 'Volume', 'Adj Close',
                    'Return', 'Short_MA', 'Long_MA']
        num_lines = len(df)
        df = df[col_list]
        print('read ', num_lines, ' lines of data for ticker: ' , ticker)
        return df
    except Exception as error:
        print(error)
        return None

In [60]:
try:
    ticker='SPY'
    input_dir = os.getcwd()
    output_file = os.path.join(input_dir, ticker + '.csv')
    df = get_stock(ticker, start_date='2000-01-01', end_date='2025-07-08', 
               s_window=14, l_window=50)
    df.to_csv(output_file, index=False)
    print('wrote ' + str(len(df)) + ' lines to file: ' + output_file)
except Exception as e:
    print(e)
    print('failed to get Yahoo stock data for ticker: ', ticker)

[*********************100%***********************]  1 of 1 completed

read  6415  lines of data for ticker:  SPY
wrote 6415 lines to file: /Users/jonathanyan/Desktop/RISE DS/DS-Prac/Stock_market_proj/lstm-atr-prediction/stock-data/SPY.csv



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Return'].fillna(0, inplace = True)


In [61]:
#psuedocode
'''
1. create ATR column
2. create SD column
3. build LSTM columnn
4. split into train and test
5. train lstm on training data for ATR and test
6. train lstm on training data for SD and test
7. plot results and avg. returns based on decisions (buy if delta_volatility<-alpha, sell if delta_volatility>alpha)
'''

'\n1. create ATR column\n2. create SD column\n3. build LSTM columnn\n4. split into train and test\n5. train lstm on training data for ATR and test\n6. train lstm on training data for SD and test\n7. plot results and avg. returns based on decisions (buy if delta_volatility<-alpha, sell if delta_volatility>alpha)\n'

In [62]:
 #.xs returns a cross section; selects only values within the SPY indexes at level=1
df_n= df.xs('SPY',axis=1,level=1)
#cleaned up data

In [63]:
# import math
# #challenge of how to deal with missing values in time series data
# #choose sliding window of length=N (could be 20?) N_lstmtraining>N_atrslidingwindow
# def atr_func(d):
#     running = 0
#     for i in range(len(d)):
#         high = d.loc[i,"High"]
#         low = d.loc[i,"Low"]
#         if i==0:
#             running += high-low
#             continue
#         if i>0:
#             y_close = d.loc[i,"Close"]
        
#         TR = max(high-low,max(math.abs(high-y_close),math.abs(low-y_close)))
#         running+=TR
#     return running/len(d)

# df_n['ATR'] = df_n.rolling(window=14).apply(atr_func)
# df_n

In [64]:
#define ATR columns
high = df_n["High"]
low = df_n["Low"]
close = df_n["Close"]

prev_close = close.shift(1)


tr = pd.concat([
    high - low,
    (high - prev_close).abs(),
    (low - prev_close).abs()
], axis=1).max(axis=1)

df_n["ATR"] = tr.rolling(7).mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n["ATR"] = tr.rolling(7).mean()


In [65]:
import numpy as np

log_diff = np.log(df_n["Close"]/df_n["Close"].shift(1))
df_n["SD_Log_Close"] = log_diff.rolling(7).std()
df_n["ATR_normalized"] = (df_n["ATR"] - df_n["ATR"].mean())/df_n["ATR"].std()
df_n["SD_normalized"] = (df_n["SD_Log_Close"] - df_n["SD_Log_Close"].mean())/df_n["SD_Log_Close"].std()
#xlb, xle, 
df_n

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n["SD_Log_Close"] = log_diff.rolling(7).std()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n["ATR_normalized"] = (df_n["ATR"] - df_n["ATR"].mean())/df_n["ATR"].std()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n["SD_normalized"] = (df_n["SD_Log_Close"] - df_n["SD_Log_Close"].mean())/d

Price,Open,High,Low,Close,Volume,Adj Close,ATR,SD_Log_Close,ATR_normalized,SD_normalized
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-03,148.25,148.25,143.88,145.44,8164300,92.14,,,,
2000-01-04,143.53,144.06,139.64,139.75,8089800,88.54,,,,
2000-01-05,139.94,141.53,137.25,140.00,12177900,88.70,,,,
2000-01-06,139.62,141.50,137.75,137.75,6227200,87.27,,,,
2000-01-07,140.31,145.75,140.06,145.75,8066500,92.34,,,,
...,...,...,...,...,...,...,...,...,...,...
2025-06-30,617.38,619.22,615.04,617.85,92502500,617.85,5.868571,0.005666,1.151276,-0.589089
2025-07-01,616.36,618.83,615.52,617.65,70030100,617.65,5.398571,0.004345,0.965648,-0.766251
2025-07-02,617.24,620.49,616.61,620.45,66510400,620.45,4.717143,0.003906,0.696516,-0.825029
2025-07-03,622.45,626.28,622.43,625.34,51065800,625.34,4.450000,0.003185,0.591007,-0.921698


In [66]:
import numpy as np
from scipy.stats import linregress

def slope(x):
    idx = np.arange(len(x))
    return linregress(idx, x).slope

df_n['atr_slope_7'] = df_n['ATR'].rolling(window=7).apply(slope, raw=True)
df_n['stock_slope'] = df_n['Open'].rolling(window=7).apply(slope, raw=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n['atr_slope_7'] = df_n['ATR'].rolling(window=7).apply(slope, raw=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_n['stock_slope'] = df_n['Open'].rolling(window=7).apply(slope, raw=True)


In [67]:
#drop na values (first 6 rows)
df_n = df_n.dropna()
df_n = df_n.reset_index()


In [68]:
df_n["index"] = df_n["index"]%7

KeyError: 'index'

In [None]:
df_n

Price,Date,Open,High,Low,Close,Volume,Adj Close,ATR,SD_Log_Close,ATR_normalized,SD_normalized
0,2000-01-12,144.59,144.59,142.88,143.06,6907700,90.64,4.024286,0.029618,0.422870,2.622685
1,2000-01-13,144.47,145.75,143.28,145.00,5158300,91.87,3.580000,0.024822,0.247398,1.979539
2,2000-01-14,146.53,147.47,145.97,146.97,7437300,93.11,3.321429,0.024942,0.145274,1.995695
3,2000-01-18,145.34,146.62,145.19,145.81,6488500,92.38,3.040000,0.023834,0.034123,1.847051
4,2000-01-19,145.31,147.00,145.00,147.00,6157900,93.13,2.182857,0.011098,-0.304408,0.139264
...,...,...,...,...,...,...,...,...,...,...,...
6403,2025-06-30,617.38,619.22,615.04,617.85,92502500,617.85,5.868571,0.005666,1.151276,-0.589089
6404,2025-07-01,616.36,618.83,615.52,617.65,70030100,617.65,5.398571,0.004345,0.965648,-0.766251
6405,2025-07-02,617.24,620.49,616.61,620.45,66510400,620.45,4.717143,0.003906,0.696516,-0.825029
6406,2025-07-03,622.45,626.28,622.43,625.34,51065800,625.34,4.450000,0.003185,0.591007,-0.921698


In [None]:
# df_n["Delta_Days"] = [(df_n.loc[i,"Date"] - df_n.loc[0,"Date"]).days for i in range(len(df_n))]
# #save clean data
# df_n.to_csv("Clean_Data.csv")

In [None]:
#define lstm model
import torch
from torch.nn import *
import torch.nn.functional as F

class NN_LSTM(Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.lstm = LSTM(input_size=input_size,hidden_size=30)
        self.fc = Linear(30,output_size)
    def activation(self,X):
        return F.relu(X)
    def forward(self,input):
        input,_ = self.lstm(input)
        input = self.fc(input[-1,:,:])
        return input #return the last prediction
#lstm_layer = LSTM(input_size=4,hidden_size=30)

In [None]:
#define PDE loss
def PDE_loss(v_hat,):
    return 0
    #use torch.autograd to get PDE loss

In [None]:
def tt_split(df_n,vol_metric):
    train = df_n.loc[[i<=len(df_n)*4/5 for i in range(len(df_n))]]
    X_train = train[["index","Open","Close","High","Low", vol_metric]].to_numpy()
    y_train = train[vol_metric].to_numpy()

    test = df_n.loc[[i>len(df_n)*4/5 for i in range(len(df_n))]]
    X_test = test[["index","Open","Close","High","Low", vol_metric]].to_numpy()
    y_test = test[vol_metric].to_numpy()
    return X_train,y_train,X_test,y_test

X_train,y_train,X_test,y_test = tt_split(df_n,"ATR_normalized")

#bollinger bands
#try moving median instead of moving average
#try moving quartiles instead of std; q3-q2  *1/2
#take longer time period - five years

#lstm,cnn,and rnn cant either predict directional volatiliy for xl stocks


KeyError: "['index'] not in index"

In [None]:
def make_seq(X_train,y_train,X_test,y_test):
    T = 30  # sequence length (window size)
    X_seq = []
    y_seq = []
    X_seq_test = []
    y_seq_test = []

    for i in range(len(X_train) - T):
        X_seq.append(X_train[i:i+T])  # shape: [T, 6] <- what is wanted in lstm
        y_seq.append(y_train[i+T])    # predict next ATR value
    for i in range(len(X_test)-T):    
        X_seq_test.append(X_test[i:i+T])
        y_seq_test.append(y_test[i+T])


    X_seq = torch.tensor(X_seq, dtype=torch.float32)
    y_seq = torch.tensor(y_seq, dtype=torch.float32).unsqueeze(1)
    X_seq_test = torch.tensor(X_seq_test, dtype=torch.float32)
    y_seq_test = torch.tensor(y_seq_test, dtype=torch.float32).unsqueeze(1)
    
    return X_seq,y_seq,X_seq_test,y_seq_test
X_seq,y_seq,X_seq_test,y_seq_test = make_seq(X_train,y_train,X_test,y_test)

In [None]:
from torch.utils.data import DataLoader, TensorDataset

def create_loaders(X_seq,y_seq,X_seq_test,y_seq_test,batch_size=64):
    dataset = TensorDataset(X_seq, y_seq)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    dataset_test = TensorDataset(X_seq_test,y_seq_test)
    loader_test = DataLoader(dataset_test,batch_size=batch_size,shuffle=True)
    return loader,loader_test
loader,loader_test = create_loaders(X_seq,y_seq,X_seq_test,y_seq_test)

In [None]:
# #training loop
# model = NN_LSTM(input_size=5,output_size=1)
# epochs = 100
# optim = torch.optim.Adam(params = model.parameters())
# crit = MSELoss()
# losses = []
# losses_test = []

# for i in range(epochs):
#     running_loss = 0
#     for x_window,y_atr in loader:
#         #print("Running")
#         input = x_window.permute(1,0,2) #shape = [seq_length,batch_length,4]
#         out = model(input)
#         #print(y_atr.shape)
#         #print(out,y_atr)
#         #break
#         loss = crit(out,y_atr)
#         running_loss+=loss.item()
#         optim.zero_grad()
#         loss.backward()
#         optim.step()
#             # could try loss += (i+1)/period/sum(j/period for j in range(period))crit(out,y_train[i]); adds a coeff to give more weigt to recent ones
#     #break
#     running_loss/=(len(loader))
#     #print(f"Training Loss: {running_loss}")
#     losses.append(running_loss)

#     with torch.no_grad():
#         testing_loss = 0
#         for x_window_test,y_atr_test in loader_test:
#             out_test = model(x_window_test.permute(1,0,2))
#             #print(y_atr_test.shape)
#             loss = crit(out_test,y_atr_test)
#             testing_loss+=loss.item()
#         losses_test.append(testing_loss/(len(loader_test)))
    


In [None]:
# import seaborn as sns
# import matplotlib.pyplot as plt

# sns.lineplot(x=[i for i in range(len(losses))],y=losses)
# plt.title("Training Loss of LSTM (ATR_normalized) across 60 epochs")

In [None]:
# sns.lineplot(losses_test)
# plt.title("Testing Loss of LSTM (ATR_normalized) across 60 epochs")

In [None]:
# X_train_sd,y_train_sd,X_test_sd,y_test_sd = tt_split(df_n,vol_metric="SD_normalized")
# X_seq_sd,y_seq_sd,X_seq_test_sd,y_seq_test_sd = make_seq(X_train_sd,y_train_sd,X_test_sd,y_test_sd)
# loader_sd,loader_test_sd = create_loaders(X_seq_sd,y_seq_sd,X_seq_test_sd,y_seq_test_sd)

In [None]:
# #training loop
# model_sd = NN_LSTM(input_size=5,output_size=1)
# epochs = 100
# optim = torch.optim.Adam(params = model_sd.parameters())
# crit = MSELoss()
# losses = []
# losses_test = []

# for i in range(epochs):
#     running_loss = 0
#     for x_window,y_sd in loader_sd:
#         #print("Running")
#         input = x_window.permute(1,0,2) #shape = [seq_length,batch_length,4]
#         out = model_sd(input)
#         #print(y_sd.shape)
#         loss = crit(out,y_sd)
#         running_loss+=loss.item()
#         optim.zero_grad()
#         loss.backward()
#         optim.step()
#             # could try loss += (i+1)/period/sum(j/period for j in range(period))crit(out,y_train[i]); adds a coeff to give more weigt to recent ones
#     running_loss/=(len(loader_sd))
#     #print(f"Training Loss: {running_loss}")
#     losses.append(running_loss)

#     with torch.no_grad():
#         testing_loss = 0
#         for x_window_test,y_sd_test in loader_test_sd:
#             out_test = model_sd(x_window_test.permute(1,0,2))
#             #print(y_sd_test.shape)
#             loss = crit(out_test,y_sd_test)
#             testing_loss+=loss.item()
#         losses_test.append(testing_loss/(len(loader_test_sd)))


In [None]:
# sns.lineplot(losses)
# plt.title("Training Loss of LSTM (SD__normalized) across 60 epochs")

In [None]:
# sns.lineplot(losses_test)
# plt.title("Testing Loss of LSTM (SD_normalized) across 60 epochs")

In [None]:
#use past atrs
#try transformations of atr log(atr)

In [None]:
def pipeline(ticker,start,end,metric):
    df = get_stock(ticker,start_date=start,end_date=end,s_window=14,l_window=50)
    df_n= df.xs(ticker,axis=1,level=1)
    
        #define ATR columns
    high = df_n["High"]
    low = df_n["Low"]
    close = df_n["Close"]

    prev_close = close.shift(1)


    tr = pd.concat([
        high - low,
        (high - prev_close).abs(),
        (low - prev_close).abs()
    ], axis=1).max(axis=1)

    df_n["ATR"] = tr.rolling(7).mean()
    
    log_diff = np.log(df_n["Close"]/df_n["Close"].shift(1))
    df_n["SD_Log_Close"] = log_diff.rolling(7).std()
    df_n["ATR_normalized"] = (df_n["ATR"] - df_n["ATR"].mean())/df_n["ATR"].std()
    df_n["SD_normalized"] = (df_n["SD_Log_Close"] - df_n["SD_Log_Close"].mean())/df_n["SD_Log_Close"].std()

    df_n = df_n.dropna()
    df_n = df_n.reset_index()
    df_n["index"] = df_n.index%7 
    print(df_n)

    X_train,y_train,X_test,y_test = tt_split(df_n, metric)

    X_seq,y_seq,X_seq_test,y_seq_test = make_seq(X_train,y_train,X_test,y_test)

    loader,loader_test = create_loaders(X_seq,y_seq,X_seq_test,y_seq_test)

    #training loop
    
    model = NN_LSTM(input_size=6,output_size=1)
    epochs = 100
    optim = torch.optim.Adam(params = model.parameters())
    crit = MSELoss()
    losses = []
    losses_test = []

    for i in range(epochs):
        running_loss = 0
        for x_window,y_atr in loader:
            #print("Running")
            input = x_window.permute(1,0,2) #shape = [seq_length,batch_length,4]
            out = model(input)
            #print(y_atr.shape)
            #print(out,y_atr)
            #break
            loss = crit(out,y_atr)
            running_loss+=loss.item()
            optim.zero_grad()
            loss.backward()
            optim.step()
                # could try loss += (i+1)/period/sum(j/period for j in range(period))crit(out,y_train[i]); adds a coeff to give more weigt to recent ones
        #break
        running_loss/=(len(loader))
        #print(f"Training Loss: {running_loss}")
        losses.append(running_loss)

        with torch.no_grad():
            testing_loss = 0
            for x_window_test,y_atr_test in loader_test:
                out_test = model(x_window_test.permute(1,0,2))
                #print(y_atr_test.shape)
                loss = crit(out_test,y_atr_test)
                testing_loss+=loss.item()
            losses_test.append(testing_loss/(len(loader_test)))

    sns.lineplot(x=[i for i in range(len(losses))],y=losses)
    plt.title(f"Training Loss of LSTM ({metric}) across 60 epochs for {ticker}")
    plt.show()

    sns.lineplot(losses_test)
    plt.title(f"Testing Loss of LSTM ({metric}) across 60 epochs for {ticker}")
    plt.show()

    return min(losses),min(losses_test)

    

In [None]:
start = "2000-01-01"
end = "2025-07-14"
#pipeline("SPY",start,end,metric="ATR_normalized")

In [None]:
#pipeline("XLP",start,end,metric="SD_normalized")
import pandas as pd

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
etfs = ['XLB','XLE','XLF','XLI','XLP','XLV','XLY','XLU']
metrics = ["ATR_normalized","SD_normalized"]
df = pd.DataFrame({"etf":[],"metric":[],"train_loss":[],"test_loss":[]})
for etf in etfs[:2]:
    for metric in metrics:
        train_loss_atr, test_loss_atr = pipeline(etf,start,end,metric=metric)
        df.loc[len(df)] = [etf,metric,train_loss_atr,test_loss_atr]


[*********************100%***********************]  1 of 1 completed


read  6419  lines of data for ticker:  XLB
Price       Date   Open   High    Low  Close   Volume  Adj Close       ATR  \
0     2000-01-12  26.61  27.03  26.61  26.64   135300      15.04  0.640000   
1     2000-01-13  26.89  26.97  26.62  26.75    45900      15.11  0.627143   
2     2000-01-14  26.52  26.72  26.44  26.56    76000      15.00  0.515714   
3     2000-01-18  26.22  26.31  25.69  25.92    34400      14.64  0.504286   
4     2000-01-19  25.78  25.95  25.42  25.42   125700      14.36  0.510000   
...          ...    ...    ...    ...    ...      ...        ...       ...   
6407  2025-07-07  90.90  91.22  90.01  90.49  5880800      90.49  1.254286   
6408  2025-07-08  90.68  91.52  90.57  91.23  8173800      91.23  1.224286   
6409  2025-07-09  91.47  91.88  90.92  91.78  6717800      91.78  1.235714   
6410  2025-07-10  92.00  92.86  91.81  92.27  6432800      92.27  1.297143   
6411  2025-07-11  91.28  91.68  90.81  91.52  5781600      91.52  1.108571   

Price  SD_Log_Close 

: 

In [None]:
df

Price,Date,Year,Month,Day,Weekday,Week_Number,Year_Week,Open,High,Low,Close,Volume,Adj Close,Return,Short_MA,Long_MA
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,SPY,SPY,SPY,SPY,SPY,SPY,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2000-01-03,2000-01-03,2000,1,3,Monday,01,2000-01,148.25,148.25,143.88,145.44,8164300,92.14,,92.140000,92.140000
2000-01-04,2000-01-04,2000,1,4,Tuesday,01,2000-01,143.53,144.06,139.64,139.75,8089800,88.54,-0.039106,90.340000,90.340000
2000-01-05,2000-01-05,2000,1,5,Wednesday,01,2000-01,139.94,141.53,137.25,140.00,12177900,88.70,0.001789,89.793333,89.793333
2000-01-06,2000-01-06,2000,1,6,Thursday,01,2000-01,139.62,141.50,137.75,137.75,6227200,87.27,-0.016071,89.162500,89.162500
2000-01-07,2000-01-07,2000,1,7,Friday,01,2000-01,140.31,145.75,140.06,145.75,8066500,92.34,0.058076,89.798000,89.798000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-30,2025-06-30,2025,6,30,Monday,26,2025-26,617.38,619.22,615.04,617.85,92502500,617.85,0.004781,603.100714,579.810400
2025-07-01,2025-07-01,2025,7,1,Tuesday,26,2025-26,616.36,618.83,615.52,617.65,70030100,617.65,-0.000324,604.268571,581.666200
2025-07-02,2025-07-02,2025,7,2,Wednesday,26,2025-26,617.24,620.49,616.61,620.45,66510400,620.45,0.004533,605.758571,583.827800
2025-07-03,2025-07-03,2025,7,3,Thursday,26,2025-26,622.45,626.28,622.43,625.34,51065800,625.34,0.007881,607.427857,585.820600


In [72]:
df_n

Price,Date,Open,High,Low,Close,Volume,Adj Close,ATR,SD_Log_Close,ATR_normalized,SD_normalized,atr_slope_7,stock_slope
0,2000-01-20,146.97,146.97,143.81,144.75,5800100,91.71,2.370000,0.012651,-0.230495,0.347628,-0.368776,0.206786
1,2000-01-21,145.50,145.50,144.06,144.44,6244800,91.51,2.182857,0.011798,-0.304408,0.233226,-0.324388,0.232500
2,2000-01-24,145.66,145.84,139.41,140.34,7896900,88.92,2.857143,0.015872,-0.038097,0.779455,-0.182704,0.112143
3,2000-01-25,140.52,141.94,139.00,141.94,9942500,89.93,2.892857,0.015525,-0.023991,0.732997,-0.058980,-0.614286
4,2000-01-26,141.00,141.55,140.09,140.81,5158100,89.21,2.804286,0.013730,-0.058973,0.492267,0.042857,-0.853929
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6398,2025-06-30,617.38,619.22,615.04,617.85,92502500,617.85,5.868571,0.005666,1.151276,-0.589089,-0.111990,3.476429
6399,2025-07-01,616.36,618.83,615.52,617.65,70030100,617.65,5.398571,0.004345,0.965648,-0.766251,-0.221327,3.393929
6400,2025-07-02,617.24,620.49,616.61,620.45,66510400,620.45,4.717143,0.003906,0.696516,-0.825029,-0.318673,2.286429
6401,2025-07-03,622.45,626.28,622.43,625.34,51065800,625.34,4.450000,0.003185,0.591007,-0.921698,-0.308265,2.271429


In [82]:
import math
    

In [89]:

def run_sim(df_n):
    money = 200
    shares = 0
    for i in range(len(df_n['Open'])):
        price = money/df_n['Open'].loc[i]
        if (df_n['atr_slope_7'].loc[i] < 0) & (df_n['stock_slope'].loc[i] > 0):
            max_shares = math.floor(price)
            shares += max_shares
            money -= (max_shares * price)
        elif (df_n['atr_slope_7'].loc[i] > 0) & (df_n['stock_slope'].loc[i] < 0):
            money+= (shares*price)
            shares = 0
    money+= (shares*price)
    shares = 0

    return money

run_sim(df_n)



    

np.float64(133.23982522451678)