In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

In [2]:
# read in data 
df = pd.read_csv("After_Pivoting_dataset.csv")

In [3]:
df.columns

Index(['Date', 'Ticker', 'Bollinger_High', 'Bollinger_Low', 'Bollinger_MAVG',
       'Close', 'Close_scaled', 'High_scaled', 'Low_scaled', 'MACD_Hist',
       'MACD_Line', 'MACD_Signal', 'OBV', 'Open_scaled', 'RSI_14', 'Volume'],
      dtype='object')

In [4]:
dates = df["Date"]
train = dates < "2023-01-01"
test = dates >= "2023-01-01"

train1 = df[train].copy()
test1 = df[test].copy()

# ✅ Shift Close_scaled backward to make target = t+1 day's Close_scaled
train1["target"] = train1["Close_scaled"].shift(-1)
test1["target"] = test1["Close_scaled"].shift(-1)

# ✅ Drop the last row in each set (since its t+1 target is NaN)
train1.dropna(subset=["target"], inplace=True)
test1.dropna(subset=["target"], inplace=True)

# ✅ Feature sets
X_train = train1[['Bollinger_High', 'Bollinger_Low', 'Bollinger_MAVG',
                  'High_scaled', 'Low_scaled', 'MACD_Hist',
                  'MACD_Line', 'MACD_Signal', 'OBV', 'Open_scaled',
                  'RSI_14', 'Volume', 'Close_scaled']]

y_train = train1["target"]

X_test = test1[['Bollinger_High', 'Bollinger_Low', 'Bollinger_MAVG',
                'High_scaled', 'Low_scaled', 'MACD_Hist',
                'MACD_Line', 'MACD_Signal', 'OBV', 'Open_scaled',
                'RSI_14', 'Volume', 'Close_scaled']]

y_test = test1["target"]

In [5]:
model = RandomForestRegressor(n_estimators = 250, max_depth = 5)
model.fit(X_train, y_train)

In [6]:
y_pred = model.predict(X_test)

In [7]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [8]:
print(rmse)
print(mae)
print(r2)

0.7399191393757985
0.5275071085162235
0.2525459405385657


In [9]:
for ticker in df['Ticker'].unique():
    mask = df['Ticker'] == ticker
    X_ticker, y_ticker = X_test[mask], y_test[mask]
    r2 = r2_score(y_ticker, model.predict(X_ticker))
    print(f"{ticker}: R² = {r2:.2f}")

AGG: R² = 0.84
BND: R² = -0.80
DBC: R² = -0.31
DIA: R² = 0.54
DVY: R² = 0.18
EEM: R² = 0.56
EFA: R² = 0.30
EMB: R² = 0.30
EWA: R² = 0.63


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


EWC: R² = 0.49
EWG: R² = 0.21
EWH: R² = -0.00
EWJ: R² = 0.29
EWU: R² = 0.05
GLD: R² = -0.20
HYG: R² = 0.04
IAK: R² = 0.35
IAT: R² = -0.57
IAU: R² = -0.37
IBB: R² = 0.29
ICF: R² = 0.43
IDU: R² = 0.02
IEF: R² = -0.56


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


IGV: R² = -0.09
IHE: R² = 0.24
IHF: R² = 0.14
IHI: R² = 0.28
IJJ: R² = 0.75
IJK: R² = 0.64
IJS: R² = 0.81
IJT: R² = 0.48
ITB: R² = 0.36
ITOT: R² = 0.73
IUSG: R² = 0.41
IUSV: R² = 0.64
IVV: R² = 0.83
IWB: R² = 0.65


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


IWD: R² = 0.35
IWF: R² = 0.38
IWM: R² = 0.80
IWN: R² = 0.76
IWO: R² = 0.74
IWP: R² = 0.68
IWS: R² = 0.56
IYC: R² = -0.14
IYE: R² = 0.27
IYF: R² = 0.83
IYG: R² = 0.21
IYH: R² = 0.35
IYK: R² = 0.10
IYM: R² = 0.36


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


IYR: R² = 0.03
IYW: R² = -0.06
LQD: R² = 0.23
PFF: R² = 0.09
QQQ: R² = 0.19
REM: R² = 0.53
SCHA: R² = 0.68
SCHB: R² = 0.37
SCHE: R² = 0.55
SCHF: R² = 0.53
SCHX: R² = -0.01
SCHZ: R² = -0.59
SDY: R² = -0.05
SHY: R² = -0.12


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


SLV: R² = -0.10
SOXX: R² = 0.59
SPY: R² = -0.09
TLT: R² = -0.30
UNG: R² = -0.69
USO: R² = -0.14
VB: R² = 0.79
VBK: R² = 0.73
VBR: R² = 0.50
VEU: R² = 0.50
VIG: R² = 0.42
VNQ: R² = 0.58
VO: R² = 0.78
VOE: R² = 0.55


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


VOO: R² = 0.69
VOT: R² = 0.78
VTI: R² = 0.65
VTV: R² = 0.31
VUG: R² = 0.27
VWO: R² = 0.28
VYM: R² = 0.59
XLB: R² = 0.23
XLE: R² = 0.23
XLF: R² = 0.56
XLI: R² = 0.32
XLK: R² = -0.32
XLP: R² = 0.30


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


XLU: R² = 0.08
XLV: R² = 0.15
XLY: R² = -0.18


  X_ticker, y_ticker = X_test[mask], y_test[mask]
  X_ticker, y_ticker = X_test[mask], y_test[mask]


In [10]:
# saving results
df_new = df[df["Date"] >= "2023-01-01"].copy()
df_new = df_new.drop(['Bollinger_High', 'Bollinger_Low', 'Bollinger_MAVG', 'Close', 
              'High_scaled', 'Low_scaled', 'MACD_Hist', 'MACD_Line', 'MACD_Signal', 'OBV', 'Open_scaled', 'RSI_14', 'Volume'], axis=1)

In [11]:
df_new["Pred"] = y_pred

ValueError: Length of values (50729) does not match length of index (50730)

In [None]:
df_new

Unnamed: 0,Date,Ticker,Close_scaled,Pred
208905,2023-01-03,AGG,1.781440,1.514439
208906,2023-01-03,BND,1.565326,1.514439
208907,2023-01-03,DBC,-2.180214,-1.126866
208908,2023-01-03,DIA,-0.078607,0.166733
208909,2023-01-03,DVY,-0.084591,-0.117087
...,...,...,...,...
259630,2025-02-19,XLK,-0.053708,0.089330
259631,2025-02-19,XLP,0.847979,0.838597
259632,2025-02-19,XLU,0.404123,0.428472
259633,2025-02-19,XLV,1.202650,0.875571


In [None]:
# save data 
df_new.to_pickle("DF_pred_n250_depth5_closeScaled.pickle")