In [None]:
import pandas as pd
import os
from methods import create_timeseries_for_symbol, prepare_regression_data, evaluate_model
from tuning import tune_xgboost
from sklearn.model_selection import train_test_split

def main():

    # Directory containing the symbol CSV files
    symbol_data_dir = "symbol_data"
    
    # Loop through each CSV file in the directory
    for csv_file in os.listdir(symbol_data_dir):
        if csv_file.endswith('.csv'):
            # Extract symbol_id from filename (assuming format "symbol_X.csv")
            symbol_id = int(csv_file.split('_')[1].split('.')[0])
            print(f"\nProcessing symbol {symbol_id}...")
            
            # Load data for this symbol
            csv_path = os.path.join(symbol_data_dir, csv_file)
            df = pd.read_csv(csv_path)

            print(f"\nTraining and Evaluating for symbol {symbol_id} ...")
            features, responders, target = create_timeseries_for_symbol(df, symbol_id)
            X, y = prepare_regression_data(features, responders, target)

            # Convert inputs to numpy arrays if they're pandas DataFrames
            if isinstance(X, pd.DataFrame):
                X = X.to_numpy()
            if isinstance(y, pd.Series):
                y = y.to_numpy()

            # Tune model
            best_model = tune_xgboost(X, y)
            
            # Evaluate best model
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            
            evaluate_model(best_model, X_train, X_test, y_train, y_test)

if __name__ == "__main__":

    main()