In [1]:
# --- Model Tuning Notebook ---
import pandas as pd
import numpy as np
import os
os.chdir("/Users/gagigetsadze/Desktop/Python programs/Time_Series_Pred")
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

from features.build_features import FeatureEngineering
from models.arima import ARIMAModel
from models.prophet import ProphetModel
from models.lstm import LSTMModel
from models.xgboost_model import XGBoostModel

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [None]:
# Project Directory Setup (adjust as necessary)
project_dir = os.getcwd()
raw_data_dir = os.path.join(project_dir, "data", "raw", "1023")

In [None]:
# Load Data
def load_data(filepath):
    try:
        df = pd.read_csv(filepath)
        print(f"Successfully loaded data from: {filepath}")
        return df
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
        return None
    except Exception as e:
        print(f"An error occurred loading data: {e}")
        return None

In [None]:
sp500_companies_path = os.path.join(raw_data_dir, "sp500_companies.csv")
sp500_index_path = os.path.join(raw_data_dir, "sp500_index.csv")
sp500_stocks_path = os.path.join(raw_data_dir, "sp500_stocks.csv")

sp500_companies_df = load_data(sp500_companies_path)
sp500_index_df = load_data(sp500_index_path)
sp500_stocks_df = load_data(sp500_stocks_path)
sp500_stocks_df['Date'] = pd.to_datetime(sp500_stocks_df['Date'])

In [None]:
def preprocess_stocks_data(sp500_stocks_df):
    if sp500_stocks_df is None:
        return None
    sp500_stocks_df = sp500_stocks_df.dropna(subset=["Adj Close"])
    sp500_stocks_df = sp500_stocks_df.sort_values(by="Date").reset_index(drop=True)
    return sp500_stocks_df

In [None]:
def create_and_merge_features(sp500_stocks_df, sp500_companies_df):
    if sp500_stocks_df is None:
        return None
    feature_eng = FeatureEngineering()
    lags_to_use = [1, 7, 30]
    sp500_stocks_df = feature_eng.create_lag_features(
        sp500_stocks_df, "Adj Close", lags_to_use
    )
    windows_to_use = [7, 30, 90]
    sp500_stocks_df = feature_eng.create_rolling_features(
        sp500_stocks_df, "Adj Close", windows_to_use
    )
    sp500_stocks_df = feature_eng.create_calendar_features(sp500_stocks_df, "Date")
    sp500_stocks_df = feature_eng.merge_company_data(
        sp500_stocks_df, sp500_companies_df
    )
    return sp500_stocks_df

In [None]:
def split_data(sp500_stocks_df):
    if sp500_stocks_df is None:
        return None, None, None
    feature_eng = FeatureEngineering()
    train_data, val_data, test_data = feature_eng.time_series_split(
        sp500_stocks_df, "Date"
    )
    return train_data, val_data, test_data

In [None]:
sp500_stocks_df = preprocess_stocks_data(sp500_stocks_df)
sp500_stocks_df = create_and_merge_features(sp500_stocks_df, sp500_companies_df)
train_data, val_data, test_data = split_data(sp500_stocks_df)

In [None]:
# 1. ARIMA Hyperparameter Tuning
# Example: Grid search for p, d, q
if train_data is not None:
    # You'll need to add your code here to use TimeSeriesSplit
    #  and a grid search to find the best parameters for the ARIMA model.
    pass


# 2. Prophet Hyperparameter Tuning
if train_data is not None:
    #  Add your code here to tune the prophet model
    pass


# 3. LSTM Hyperparameter Tuning
if train_data is not None:
    # Add code to search for the best num_layers, hidden_size, learning_rate, batch_size
    pass

# 4. XGBoost Hyperparameter Tuning
if train_data is not None:
    #  Add code to search for best params for XGBoost model
    pass