In [1]:
# Import standard library modules
import sys

# Set the relative path to the project root directory
relative_path_to_root = "../../"

# Add the project root to the system path for importing in-house modules
sys.path.append(relative_path_to_root)

# Import in-house modules from the 'utilities' package
from utilities import load_data, save_data
from utilities import save_data
from utilities import split_dataset_by_date

In [2]:
# Data manipulation and analysis
import pandas as pd

In [3]:
file_name = "updated_w_nas"
file_path = f"../../data/raw_data/{file_name}"

raw_data = load_data(file_path)

[1m[36m╔═══════════════════════════════════════════════════════════════╗[0m
[1m[36m║[0m[1m[96m File `updated_w_nas.csv.bz2` loaded from `updated_w_nas.zip`  [0m[1m[36m║[0m
[1m[36m╚═══════════════════════════════════════════════════════════════╝[0m


In [4]:
print("Shape:", raw_data.shape)
raw_data.head()

Shape: (2121735, 18)


Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action
0,2008-01-02,A,23.256388,-1.0,-1.0,23.025745,23.538284,-0.011976,0.015704,48.827595,23.314173,23.299887,23.564934,24.72725,22.540232,21.392027,24.351938,short
1,2008-01-02,AAPL,5.876342,1.0,-1.0,5.879056,5.974059,-0.016357,0.018937,59.067417,5.518483,4.939064,4.19763,6.135834,5.403559,4.637375,6.026838,buy
2,2008-01-02,ABBV,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,2008-01-02,ABNB,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,2008-01-02,ABT,18.130213,-1.0,-1.0,18.01976,18.24065,-0.006054,0.010484,34.67727,18.138458,17.62825,17.709028,19.233109,18.221802,16.775555,19.134012,short


In [5]:
def split_dataset_by_date(raw_data: pd.DataFrame, start_date: str, end_date: str):
    # Filter data by date range
    filter_data_by_date = (raw_data["Date"] >= start_date) & (raw_data["Date"] <= end_date)

    return raw_data[filter_data_by_date]

In [6]:
start_date = "2024-10-25"
end_date = "2024-10-25"

todays_data = split_dataset_by_date(raw_data, start_date, end_date)

print("Start date:", todays_data["Date"].min())
print("End date:", todays_data["Date"].max())
print("Requested start date:", start_date)
print("Requested end date:", end_date)

Start date: 2024-10-25
End date: 2024-10-25
Requested start date: 2024-10-25
Requested end date: 2024-10-25


In [7]:
print("Shape:", todays_data.shape)
todays_data.head()

Shape: (501, 18)


Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action
2120733,2024-10-25,A,130.19,1.0,-1.0,131.62,130.69,-0.003826,0.012138,18.89193,140.33304,136.65565,137.9486,152.14601,129.75839,130.19,148.244,buy
2120734,2024-10-25,AAPL,231.41,1.0,1.0,233.49,230.57,0.003643,0.01367,66.38572,226.8016,221.79678,200.76889,237.80995,222.31105,216.32,236.48,hold
2120735,2024-10-25,ABBV,187.85,1.0,-1.0,189.74,189.65,-0.009491,0.008503,38.957226,192.70592,183.02982,174.24835,197.08025,185.91818,186.54,197.77069,buy
2120736,2024-10-25,ABNB,134.58,1.0,1.0,135.82,132.75,0.013785,0.014979,59.338783,124.9472,133.4197,143.61612,139.94774,124.726265,114.28,137.19,hold
2120737,2024-10-25,ABT,114.22,-1.0,-1.0,114.13,116.54,-0.019907,0.010363,54.498634,114.14958,109.39779,109.50995,119.50028,111.01314,110.2504,119.39,short


In [8]:
def prepare_data_v2(main_data: pd.DataFrame) -> pd.DataFrame:
    # Create a copy of the input DataFrame
    df = main_data.copy().reset_index(drop=True)
    
    # Convert the `Date` column to datetime
    df["Date"] = pd.to_datetime(df["Date"])
    
    # Extract year, month, and day from the `Date` column
    df["Year"] = df["Date"].dt.year
    df["Month"] = df["Date"].dt.month
    df["Day"] = df["Date"].dt.day
    
    # Set the index to `Date` and `Ticker`
    df = df.set_index(["Date", "Ticker"])
    
    return df

In [9]:
predict_data_raw = todays_data.copy()

predict_data_raw = prepare_data_v2(predict_data_raw)

print("Shape:", predict_data_raw.shape)
predict_data_raw

Shape: (501, 19)


Unnamed: 0_level_0,Unnamed: 1_level_0,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action,Year,Month,Day
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-10-25,A,130.19,1.0,-1.0,131.620,130.69,-0.003826,0.012138,18.891930,140.333040,136.65565,137.94860,152.14601,129.758390,130.1900,148.24400,buy,2024,10,25
2024-10-25,AAPL,231.41,1.0,1.0,233.490,230.57,0.003643,0.013670,66.385720,226.801600,221.79678,200.76889,237.80995,222.311050,216.3200,236.48000,hold,2024,10,25
2024-10-25,ABBV,187.85,1.0,-1.0,189.740,189.65,-0.009491,0.008503,38.957226,192.705920,183.02982,174.24835,197.08025,185.918180,186.5400,197.77069,buy,2024,10,25
2024-10-25,ABNB,134.58,1.0,1.0,135.820,132.75,0.013785,0.014979,59.338783,124.947200,133.41970,143.61612,139.94774,124.726265,114.2800,137.19000,hold,2024,10,25
2024-10-25,ABT,114.22,-1.0,-1.0,114.130,116.54,-0.019907,0.010363,54.498634,114.149580,109.39779,109.50995,119.50028,111.013140,110.2504,119.39000,short,2024,10,25
2024-10-25,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25,XYL,130.42,1.0,-1.0,130.835,130.65,-0.001760,0.009297,39.080486,133.510680,134.41902,130.84924,138.39810,129.943900,126.7100,137.53000,buy,2024,10,25
2024-10-25,YUM,133.04,1.0,-1.0,134.870,133.16,-0.000901,0.006858,34.395523,134.611070,133.44423,134.00830,139.50783,130.664170,129.7100,139.92000,buy,2024,10,25
2024-10-25,ZBH,102.35,1.0,-1.0,103.550,104.00,-0.015865,0.010598,50.276680,107.366486,107.96990,115.37455,107.72132,101.312680,101.7700,115.91237,buy,2024,10,25
2024-10-25,ZBRA,360.09,1.0,-1.0,363.530,362.05,-0.005414,0.009781,43.893780,355.893200,336.89170,311.04196,379.99265,359.854340,320.7700,377.68000,buy,2024,10,25


In [10]:
predict_data_raw["Today to Tomorrow"].unique()

array([ 1., -1.,  0.])

In [11]:
# Drop columns that are not needed for the model
select_features_to_drop = ["Action", "Next Day Close", "Previous Day Close", "Resistance", "Upper Band", "SMA_50", "SMA_200"]
log_select_to_drop = ["Action", "Next Day Close", "Previous Day Close", "SMA_50", "Resistance", "Upper Band", "SMA_200","Next Day Close", "Adjusted Close", "Support", "Lower Band", "SMA_100"]

select_target_to_drop = ["Today to Tomorrow"]

predict_data_clean = predict_data_raw.drop(columns=[*select_features_to_drop, *select_target_to_drop])
log_predict_data_clean = predict_data_raw.drop(columns=[*log_select_to_drop, *select_target_to_drop])

predict_data_clean

Unnamed: 0_level_0,Unnamed: 1_level_0,Adjusted Close,Yesterday to Today,Return,Volatility,RSI,SMA_100,Lower Band,Support,Year,Month,Day
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-10-25,A,130.19,-1.0,-0.003826,0.012138,18.891930,136.65565,129.758390,130.1900,2024,10,25
2024-10-25,AAPL,231.41,1.0,0.003643,0.013670,66.385720,221.79678,222.311050,216.3200,2024,10,25
2024-10-25,ABBV,187.85,-1.0,-0.009491,0.008503,38.957226,183.02982,185.918180,186.5400,2024,10,25
2024-10-25,ABNB,134.58,1.0,0.013785,0.014979,59.338783,133.41970,124.726265,114.2800,2024,10,25
2024-10-25,ABT,114.22,-1.0,-0.019907,0.010363,54.498634,109.39779,111.013140,110.2504,2024,10,25
2024-10-25,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25,XYL,130.42,-1.0,-0.001760,0.009297,39.080486,134.41902,129.943900,126.7100,2024,10,25
2024-10-25,YUM,133.04,-1.0,-0.000901,0.006858,34.395523,133.44423,130.664170,129.7100,2024,10,25
2024-10-25,ZBH,102.35,-1.0,-0.015865,0.010598,50.276680,107.96990,101.312680,101.7700,2024,10,25
2024-10-25,ZBRA,360.09,-1.0,-0.005414,0.009781,43.893780,336.89170,359.854340,320.7700,2024,10,25


In [12]:
import joblib

file_name = "X_scaler.pkl"
file_path = f"../../models/christian's_models/{file_name}"

X_scaler = joblib.load(file_path)
print(f"Loaded {file_name} from {file_path}")

Loaded X_scaler.pkl from ../../models/christian's_models/X_scaler.pkl


In [13]:
file_name = "X_scaler_log.pkl"
file_path = f"../../models/christian's_models/{file_name}"

X_scaler_log = joblib.load(file_path)
print(f"Loaded {file_name} from {file_path}")

Loaded X_scaler_log.pkl from ../../models/christian's_models/X_scaler_log.pkl


In [14]:
X_scaled = X_scaler.transform(predict_data_clean)

X_scaled[:1]

array([[ 0.21131333, -1.05245118, -0.20083421, -0.50805717, -2.04946246,
         0.26606012,  0.25283036,  0.28358385,  1.63653275,  1.03295107,
         1.0600341 ]])

In [15]:
X_scaled_log = X_scaler_log.transform(log_predict_data_clean)

X_scaled_log[:1]

array([[-1.05245118, -0.20083421, -0.50805717, -2.04946246,  1.63653275,
         1.03295107,  1.0600341 ]])

In [16]:
import pickle

model_names = [
    "clf_XGB_v2.pkl",
    "random_forest_classifier_v2.pkl",
    "logistic_regression_v1.pkl"
]

models = {}

for model_name in model_names:
    model_path = f"../../models/christian's_models/{model_name}"
    with open(model_path, "rb") as file:
        model = pickle.load(file)
        model_name = model_name.replace(".pkl", "")
        models[model_name] = model
        print(f"Loaded {model_name} from {model_path}")


Loaded clf_XGB_v2 from ../../models/christian's_models/clf_XGB_v2.pkl
Loaded random_forest_classifier_v2 from ../../models/christian's_models/random_forest_classifier_v2.pkl
Loaded logistic_regression_v1 from ../../models/christian's_models/logistic_regression_v1.pkl


In [17]:
y_XGB = models["clf_XGB_v2"].predict(X_scaled)
y_RFC = models["random_forest_classifier_v2"].predict(X_scaled)
y_LR = models["logistic_regression_v1"].predict(X_scaled_log)

# Change values where `0` is to `-1`
y_XGB[y_XGB == 0] = -1
y_RFC[y_RFC == 0] = -1
y_LR[y_LR == 0] = -1

y_XGB[:5], y_RFC[:5], y_LR[:5]

(array([1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1]), array([1., 1., 1., 1., 1.]))

In [18]:
todays_data_clean = todays_data[["Date", "Ticker", "Adjusted Close", "Today to Tomorrow"]].copy()

todays_data_clean

Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow
2120733,2024-10-25,A,130.19,1.0
2120734,2024-10-25,AAPL,231.41,1.0
2120735,2024-10-25,ABBV,187.85,1.0
2120736,2024-10-25,ABNB,134.58,1.0
2120737,2024-10-25,ABT,114.22,-1.0
...,...,...,...,...
2121229,2024-10-25,XYL,130.42,1.0
2121230,2024-10-25,YUM,133.04,1.0
2121231,2024-10-25,ZBH,102.35,1.0
2121232,2024-10-25,ZBRA,360.09,1.0


In [19]:
todays_data_predict = todays_data_clean.copy()

# Change to integer
# todays_data_predict["Today to Tomorrow"] = todays_data_predict["Today to Tomorrow"].astype(int)
todays_data_predict["XGB Today to Tomorrow"] = y_XGB
todays_data_predict["RanFC Today to Tomorrow"] = y_RFC
todays_data_predict["Log_R Today to Tomorrow"] = y_LR.astype(int)


print(todays_data_predict["XGB Today to Tomorrow"].unique())
print(todays_data_predict["RanFC Today to Tomorrow"].unique())
print(todays_data_predict["Log_R Today to Tomorrow"].unique())
todays_data_predict


[ 1 -1]
[ 1 -1]
[ 1 -1]


Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow,XGB Today to Tomorrow,RanFC Today to Tomorrow,Log_R Today to Tomorrow
2120733,2024-10-25,A,130.19,1.0,1,1,1
2120734,2024-10-25,AAPL,231.41,1.0,1,1,1
2120735,2024-10-25,ABBV,187.85,1.0,1,1,1
2120736,2024-10-25,ABNB,134.58,1.0,1,1,1
2120737,2024-10-25,ABT,114.22,-1.0,1,1,1
...,...,...,...,...,...,...,...
2121229,2024-10-25,XYL,130.42,1.0,1,1,1
2121230,2024-10-25,YUM,133.04,1.0,1,1,1
2121231,2024-10-25,ZBH,102.35,1.0,1,1,1
2121232,2024-10-25,ZBRA,360.09,1.0,1,1,1


In [20]:
display(todays_data_predict["XGB Today to Tomorrow"].value_counts())
display(todays_data_predict["RanFC Today to Tomorrow"].value_counts())
display(todays_data_predict["Log_R Today to Tomorrow"].value_counts())

XGB Today to Tomorrow
 1    500
-1      1
Name: count, dtype: int64

RanFC Today to Tomorrow
 1    480
-1     21
Name: count, dtype: int64

Log_R Today to Tomorrow
 1    496
-1      5
Name: count, dtype: int64

In [21]:
correct_predictions = todays_data_predict.copy()

correct_predictions["XGB Correct"] = correct_predictions["Today to Tomorrow"] == correct_predictions["XGB Today to Tomorrow"]
correct_predictions["RanFC Correct"] = correct_predictions["Today to Tomorrow"] == correct_predictions["RanFC Today to Tomorrow"]
correct_predictions["Log_R Correct"] = correct_predictions["Today to Tomorrow"] == correct_predictions["Log_R Today to Tomorrow"]

correct_predictions.set_index(["Date", "Ticker"], inplace=True)

correct_predictions

Unnamed: 0_level_0,Unnamed: 1_level_0,Adjusted Close,Today to Tomorrow,XGB Today to Tomorrow,RanFC Today to Tomorrow,Log_R Today to Tomorrow,XGB Correct,RanFC Correct,Log_R Correct
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-10-25,A,130.19,1.0,1,1,1,True,True,True
2024-10-25,AAPL,231.41,1.0,1,1,1,True,True,True
2024-10-25,ABBV,187.85,1.0,1,1,1,True,True,True
2024-10-25,ABNB,134.58,1.0,1,1,1,True,True,True
2024-10-25,ABT,114.22,-1.0,1,1,1,False,False,False
2024-10-25,...,...,...,...,...,...,...,...,...
2024-10-25,XYL,130.42,1.0,1,1,1,True,True,True
2024-10-25,YUM,133.04,1.0,1,1,1,True,True,True
2024-10-25,ZBH,102.35,1.0,1,1,1,True,True,True
2024-10-25,ZBRA,360.09,1.0,1,1,1,True,True,True


In [22]:
display(correct_predictions["XGB Correct"].value_counts())
display(correct_predictions["RanFC Correct"].value_counts())
display(correct_predictions["Log_R Correct"].value_counts())

XGB Correct
True     356
False    145
Name: count, dtype: int64

RanFC Correct
True     354
False    147
Name: count, dtype: int64

Log_R Correct
True     356
False    145
Name: count, dtype: int64

In [23]:
# file_name = "todays_data_predict"
# file_path = f"../../data/raw_data/{file_name}"

# save_data(todays_data_predict, file_path)