In [1]:
import sys
import pandas as pd
import torch
from pathlib import Path

# 1. Setup Project Root (Adjust based on where you run this)
# If running in a notebook at the root level or 'notebooks/' folder:
current_dir = Path.cwd()
if current_dir.name == "notebooks":
    PROJECT_ROOT = current_dir.parent
else:
    PROJECT_ROOT = current_dir

sys.path.append(str(PROJECT_ROOT))

# 2. Imports
try:
    from src.models.config import CONFIG
    from src.utils.data_loader import load_and_merge_data
    from src.training.runner import run_walk_forward

    print("‚úÖ Imports successful!")
except ImportError as e:
    print(f"‚ùå Import Failed: {e}")
    print(f"Current Sys Path: {sys.path}")
    sys.exit(1)


def run_smoke_test():
    print(f"üöÄ Starting Smoke Test on {PROJECT_ROOT}")

    # --- A. LOAD TINY DATA SLICE ---
    # We load real data but filter it aggressively to keep it light
    print("1. Loading Data...")
    try:
        data_dir = PROJECT_ROOT / "data"
        # Only load enough years to form ONE split
        # Min required: train_years (5) + val (1) + test (1) = 7 years roughly
        # But for debugging, we can trick the config to be smaller.
        df_main = load_and_merge_data(
            data_dir, start_date="2010-01-01", end_date="2016-12-31"
        )

        # Slicing for speed (optional, if your CSV is huge)
        # df_main = df_main.head(20000)

        print(f"   Loaded Data Shape: {df_main.shape}")
        if df_main.empty:
            raise ValueError("Dataframe is empty! Check your data path/dates.")

    except Exception as e:
        print(f"‚ùå Data Load Failed: {e}")
        return

    # --- B. OVERRIDE CONFIG FOR SPEED ---
    # We want this to run in < 30 seconds
    DEBUG_CONFIG = CONFIG.copy()
    DEBUG_CONFIG.update(
        {
            "batch_size": 16,  # Small batch
            "epochs": 1,  # Single pass
            "num_workers": 0,  # Disable MP for debugging safety
            "train_years": 2,  # Shorten requirement
            "val_years": 1,
            "test_years": 1,
            "start_date": "2010-01-01",
            "end_date": "2015-12-31",  # Short range
            "hidden_dim": 16,  # Tiny model
            "d_model": 16,
            "nhead": 2,
        }
    )

    # Calculate Features (Dynamic Injection)
    exclude_cols = [
        "date",
        "permno",
        "target",
        "emb_mean",
        "sent_score_mean",
        "sent_pos_mean",
        "sent_neg_mean",
        "sent_score_std",
        "log_n_news",
    ]
    DEBUG_CONFIG["num_input_dim"] = len(
        [c for c in df_main.columns if c not in exclude_cols]
    )
    print(f"   Debug Features detected: {DEBUG_CONFIG['num_input_dim']}")

    # --- C. TEST MAT PIPELINE ---
    print("\n2. Testing MAT Model...")
    try:
        df_res = run_walk_forward(
            df_main=df_main,
            model_type="MAT",
            config=DEBUG_CONFIG,
            project_root=PROJECT_ROOT,
        )

        if not df_res.empty:
            print("   ‚úÖ MAT Forward Pass: SUCCESS")
            print(f"   Output Shape: {df_res.shape}")
            print(f"   Sample Pred: {df_res.iloc[0]['pred']:.4f}")
        else:
            print("   ‚ö†Ô∏è MAT ran but returned empty DataFrame (Check Splits?)")

    except Exception as e:
        print(f"   ‚ùå MAT Failed: {e}")
        # Helpful traceback
        import traceback

        traceback.print_exc()

    # --- D. TEST CANONICAL PIPELINE ---
    print("\n3. Testing Canonical Model...")
    try:
        df_res_can = run_walk_forward(
            df_main=df_main,
            model_type="Canonical",
            config=DEBUG_CONFIG,
            project_root=PROJECT_ROOT,
        )
        if not df_res_can.empty:
            print("   ‚úÖ Canonical Forward Pass: SUCCESS")
        else:
            print("   ‚ö†Ô∏è Canonical ran but returned empty DataFrame")

    except Exception as e:
        print(f"   ‚ùå Canonical Failed: {e}")
        import traceback

        traceback.print_exc()


if __name__ == "__main__":
    run_smoke_test()

‚úÖ Imports successful!
üöÄ Starting Smoke Test on /Users/audricsicard/Documents/VSCode/AML Project/Modality-aware-transformer
1. Loading Data...
Reading data from: /Users/audricsicard/Documents/VSCode/AML Project/Modality-aware-transformer/data
Loading datasets...

--- Merging Data ---
Merged Market: (3254401, 12)
Merged Ratios: (3254401, 17)
Merged Macro: (3254401, 24)
Merged Text: (3254401, 31)
Filling NaN values...
Keeping records between 2010-01-01 and 2016-12-31...
Done! Final Data Shape: (1318143, 31)
   Loaded Data Shape: (1318143, 31)
   Debug Features detected: 22

2. Testing MAT Model...
Starting Walk-Forward for MAT on cpu...
Generating split schedule...

PROCESSING YEAR: 2013 (MAT)...
   Train: ('2010-01-01', '2011-12-31')
   Val:   ('2012-01-01', '2012-12-31')
   Test:  ('2013-01-01', '2013-12-31')
Dataset Ready. Samples: 332094 (Filtered by 2010-01-01 to 2011-12-31)
Dataset Ready. Samples: 185479 (Filtered by 2012-01-01 to 2012-12-31)
Dataset Ready. Samples: 187886 (Fil

                                                                              

KeyboardInterrupt: 