In [22]:
# Import standard library modules
import sys

# Set the relative path to the project root directory
relative_path_to_root = "../../"

# Add the project root to the system path for importing in-house modules
sys.path.append(relative_path_to_root)

# Import in-house modules from the 'utilities' package
from utilities import clean_historical_data, check_tickers_for_missing_values
from utilities import calc_vif, calc_p_values, calc_correlation, highlight_vif, highlight_p_values, evaluate_regression_model, evaluate_cross_validation, evaluate_classifier_model
from utilities import load_data, save_data
from utilities import calculate_bollinger_bands, calculate_rsi, calculate_daily_volatility
from utilities import print_title, print_label, print_footer
from utilities import generate_trading_signals
from utilities import fetch_and_download_sp500_data, sp500_data_for_today
from utilities import print_dataframe_report, save_data
from utilities import temporal_train_test_split, split_dataset_by_date

In [19]:
# Data manipulation and analysis
import pandas as pd

# Date and time manipulation
from datetime import date

# File and directory manipulation
from pathlib import Path

# Data preprocessing and model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [20]:
file_name = "updated_w_nas"
file_path = f"../../data/raw_data/{file_name}"

raw_data = load_data(file_path)

[1m[36m╔═══════════════════════════════════════════════════════════════╗[0m
[1m[36m║[0m[1m[96m File `updated_w_nas.csv.bz2` loaded from `updated_w_nas.zip`  [0m[1m[36m║[0m
[1m[36m╚═══════════════════════════════════════════════════════════════╝[0m


In [21]:
raw_data

Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action
0,2008-01-02,A,23.256388,-1.0,-1.0,23.025745,23.538284,-0.011976,0.015704,48.827595,23.314173,23.299887,23.564934,24.727250,22.540232,21.392027,24.351938,short
1,2008-01-02,AAPL,5.876342,1.0,-1.0,5.879056,5.974059,-0.016357,0.018937,59.067417,5.518483,4.939064,4.197630,6.135834,5.403559,4.637375,6.026838,buy
2,2008-01-02,ABBV,0.000000,0.0,0.0,0.000000,0.000000,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
3,2008-01-02,ABNB,0.000000,0.0,0.0,0.000000,0.000000,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
4,2008-01-02,ABT,18.130213,-1.0,-1.0,18.019760,18.240650,-0.006054,0.010484,34.677270,18.138458,17.628250,17.709028,19.233109,18.221802,16.775555,19.134012,short
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2121730,2024-10-28,XYL,130.835000,,1.0,,130.420000,0.003182,0.009305,39.970257,133.478420,134.334370,130.949620,138.418910,129.503590,126.710000,137.530000,
2121731,2024-10-28,YUM,134.870000,,1.0,,133.040000,0.013755,0.006897,50.767624,134.574110,133.394600,134.047180,138.692950,130.995060,129.710000,139.920000,
2121732,2024-10-28,ZBH,103.550000,,1.0,,102.350000,0.011725,0.010933,56.671688,107.229390,107.883995,115.284256,107.086190,101.507810,101.770000,115.912370,
2121733,2024-10-28,ZBRA,363.530000,,1.0,,360.090000,0.009553,0.009969,43.505260,356.281000,337.452000,311.584440,380.047030,359.120970,320.770000,377.680000,


In [28]:
todays_date = "2024-10-28"

historical_data, todays_data = split_dataset_by_date(raw_data, todays_date)

print("Todays Date:", todays_date)

Todays Date: 2024-10-28


In [29]:
todays_data

Unnamed: 0,Date,Ticker,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action
0,2024-10-28,A,131.620,,1.0,,130.19,0.010984,0.012264,21.425411,140.169080,136.640840,137.961560,151.502270,128.73973,130.1900,148.24400,
1,2024-10-28,AAPL,233.490,,1.0,,231.41,0.008988,0.013784,63.955185,226.950400,222.175250,201.011900,237.876570,222.29343,216.3200,236.48000,
2,2024-10-28,ABBV,189.740,,1.0,,187.85,0.010061,0.008103,45.739376,192.653440,183.300860,174.407040,196.414630,185.96626,186.5400,197.77069,
3,2024-10-28,ABNB,135.820,,1.0,,134.58,0.009214,0.014882,57.457336,125.337400,133.320100,143.597980,140.080760,125.49424,114.2800,137.19000,
4,2024-10-28,ABT,114.130,,-1.0,,114.22,-0.000788,0.010339,50.131912,114.222595,109.517050,109.521515,119.484985,111.09392,110.2504,119.39000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,2024-10-28,XYL,130.835,,1.0,,130.42,0.003182,0.009305,39.970257,133.478420,134.334370,130.949620,138.418910,129.50359,126.7100,137.53000,
497,2024-10-28,YUM,134.870,,1.0,,133.04,0.013755,0.006897,50.767624,134.574110,133.394600,134.047180,138.692950,130.99506,129.7100,139.92000,
498,2024-10-28,ZBH,103.550,,1.0,,102.35,0.011725,0.010933,56.671688,107.229390,107.883995,115.284256,107.086190,101.50781,101.7700,115.91237,
499,2024-10-28,ZBRA,363.530,,1.0,,360.09,0.009553,0.009969,43.505260,356.281000,337.452000,311.584440,380.047030,359.12097,320.7700,377.68000,


In [30]:
def prepare_data_v2(main_data: pd.DataFrame) -> pd.DataFrame:
    # Create a copy of the input DataFrame
    df = main_data.copy().reset_index(drop=True)
    
    # Convert the `Date` column to datetime
    df["Date"] = pd.to_datetime(df["Date"])
    
    # Extract year, month, and day from the `Date` column
    df["Year"] = df["Date"].dt.year
    df["Month"] = df["Date"].dt.month
    df["Day"] = df["Date"].dt.day
    
    # Set the index to `Date` and `Ticker`
    df = df.set_index(["Date", "Ticker"])
    
    return df

In [33]:
prep_data = todays_data.copy()

prep_data = prepare_data_v2(prep_data)

prep_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Adjusted Close,Today to Tomorrow,Yesterday to Today,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action,Year,Month,Day
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-10-28,A,131.620,,1.0,,130.19,0.010984,0.012264,21.425411,140.169080,136.640840,137.961560,151.502270,128.73973,130.1900,148.24400,,2024,10,28
2024-10-28,AAPL,233.490,,1.0,,231.41,0.008988,0.013784,63.955185,226.950400,222.175250,201.011900,237.876570,222.29343,216.3200,236.48000,,2024,10,28
2024-10-28,ABBV,189.740,,1.0,,187.85,0.010061,0.008103,45.739376,192.653440,183.300860,174.407040,196.414630,185.96626,186.5400,197.77069,,2024,10,28
2024-10-28,ABNB,135.820,,1.0,,134.58,0.009214,0.014882,57.457336,125.337400,133.320100,143.597980,140.080760,125.49424,114.2800,137.19000,,2024,10,28
2024-10-28,ABT,114.130,,-1.0,,114.22,-0.000788,0.010339,50.131912,114.222595,109.517050,109.521515,119.484985,111.09392,110.2504,119.39000,,2024,10,28
2024-10-28,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-28,XYL,130.835,,1.0,,130.42,0.003182,0.009305,39.970257,133.478420,134.334370,130.949620,138.418910,129.50359,126.7100,137.53000,,2024,10,28
2024-10-28,YUM,134.870,,1.0,,133.04,0.013755,0.006897,50.767624,134.574110,133.394600,134.047180,138.692950,130.99506,129.7100,139.92000,,2024,10,28
2024-10-28,ZBH,103.550,,1.0,,102.35,0.011725,0.010933,56.671688,107.229390,107.883995,115.284256,107.086190,101.50781,101.7700,115.91237,,2024,10,28
2024-10-28,ZBRA,363.530,,1.0,,360.09,0.009553,0.009969,43.505260,356.281000,337.452000,311.584440,380.047030,359.12097,320.7700,377.68000,,2024,10,28


In [34]:
# Drop columns that are not needed for the model
select_columns_to_drop = ["Action", "Next Day Close", "Previous Day Close", "Resistance", "Upper Band", "SMA_50", "SMA_200"]

prep_data = prep_data.drop(columns=select_columns_to_drop)

prep_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Adjusted Close,Today to Tomorrow,Yesterday to Today,Return,Volatility,RSI,SMA_100,Lower Band,Support,Year,Month,Day
Date,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-10-28,A,131.620,,1.0,0.010984,0.012264,21.425411,136.640840,128.73973,130.1900,2024,10,28
2024-10-28,AAPL,233.490,,1.0,0.008988,0.013784,63.955185,222.175250,222.29343,216.3200,2024,10,28
2024-10-28,ABBV,189.740,,1.0,0.010061,0.008103,45.739376,183.300860,185.96626,186.5400,2024,10,28
2024-10-28,ABNB,135.820,,1.0,0.009214,0.014882,57.457336,133.320100,125.49424,114.2800,2024,10,28
2024-10-28,ABT,114.130,,-1.0,-0.000788,0.010339,50.131912,109.517050,111.09392,110.2504,2024,10,28
2024-10-28,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-28,XYL,130.835,,1.0,0.003182,0.009305,39.970257,134.334370,129.50359,126.7100,2024,10,28
2024-10-28,YUM,134.870,,1.0,0.013755,0.006897,50.767624,133.394600,130.99506,129.7100,2024,10,28
2024-10-28,ZBH,103.550,,1.0,0.011725,0.010933,56.671688,107.883995,101.50781,101.7700,2024,10,28
2024-10-28,ZBRA,363.530,,1.0,0.009553,0.009969,43.505260,337.452000,359.12097,320.7700,2024,10,28


In [38]:
X = prep_data.drop(columns=["Today to Tomorrow"])

X_scaler = StandardScaler()

X_scaled = X_scaler.fit_transform(X)

X_scaled


array([[-0.17354956,  0.63822489,  0.44088412, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.02664615,  0.63822489,  0.28147951, ...,  0.        ,
         0.        ,  0.        ],
       [-0.05933169,  0.63822489,  0.36717929, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-0.22871294,  0.63822489,  0.50004695, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.28220176,  0.63822489,  0.32659604, ...,  0.        ,
         0.        ,  0.        ],
       [-0.07287197,  0.63822489,  0.82376347, ...,  0.        ,
         0.        ,  0.        ]])

In [41]:
import pickle

model_name = "clf_XGB_v1.pkl"
model_path = f"../../models/{model_name}"

with open(model_path, 'rb') as file:
    loaded_model = pickle.load(file)

FileNotFoundError: [Errno 2] No such file or directory: '../models/clf_XGB_v1.pkl'

In [None]:
# Load model from file

import pickle
# Import the LogisticRegression class from the linear_model module

model_name = "clf_XGB_v1.pkl"
model_path = f"../../models/{model_name}"

with open(model_path, 'rb') as file:
    loaded_model = pickle.load(file)

# Use the loaded model to make predictions
predictions = loaded_model.predict(pivot_data)
