In [None]:
import pandas as pd
from methods import create_timeseries_for_symbol, prepare_regression_data, tune_xgboost, evaluate_model
from sklearn.model_selection import train_test_split

def main():

    # Load and prepare data (with all features and responders)
    df = pd.read_parquet("train.parquet/partition_id=0/part-0.parquet")
    list_of_symbols = df['symbol_id'].unique()

    print(f"\n Total number of symbols (financial instruments) is {len(list_of_symbols)}")
    symbol_id = list_of_symbols[0]

    for symbol_id in list_of_symbols:
        print(f"\nTraining and Evaluating for symbol {symbol_id} ...")
        features, responders, target = create_timeseries_for_symbol(df, symbol_id)
        X, y = prepare_regression_data(features, responders, target)
        
        # Tune model
        best_model = tune_xgboost(X, y)
        
        # Evaluate best model
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        evaluate_model(best_model, X_train, X_test, y_train, y_test)

if __name__ == "__main__":

    main()