In [2]:
# === merge_features.py ===

import pandas as pd
import os

# === CONFIG ===
BASE_DIR = r"C:\Users\flass\OneDrive\AI Financial Model\S&P 500 Chatgpt Version"
FUNDAMENTAL_PATH = os.path.join(BASE_DIR, "df_fundamental_features.csv")
TECHNICAL_PATH = os.path.join(BASE_DIR, "clean_technical_features.csv")
TARGET_PATH = os.path.join(BASE_DIR, "df_labeled.csv")
OUTPUT_PATH = os.path.join(BASE_DIR, "df_features_merged.csv")

# === Load Features ===
print("📂 Loading technical + fundamental features...")
df_fund = pd.read_csv(FUNDAMENTAL_PATH, parse_dates=["date"])
df_tech = pd.read_csv(TECHNICAL_PATH, parse_dates=["date"])

# Clean keys
df_fund["ticker"] = df_fund["ticker"].str.strip().str.upper()
df_tech["ticker"] = df_tech["ticker"].str.strip().str.upper()

# Merge features
print("🔁 Merging features on ['ticker', 'date']...")
df = pd.merge(df_fund, df_tech, on=["ticker", "date"], how="inner")
print(f"✅ Feature merge shape: {df.shape}")

# === Load Targets ===
print("🎯 Loading target columns from df_labeled.csv...")
df_targets = pd.read_csv(
    TARGET_PATH,
    usecols=["ticker", "date"] + [col for col in pd.read_csv(TARGET_PATH, nrows=1).columns if col.startswith("target_")]
)
df_targets["date"] = pd.to_datetime(df_targets["date"], errors="coerce")

df_targets["ticker"] = df_targets["ticker"].str.strip().str.upper()

# Merge in targets
print("➕ Merging targets into dataset...")
df = pd.merge(df, df_targets, on=["ticker", "date"], how="left")

# Final output
print(f"💾 Saving final merged dataset with targets to: {OUTPUT_PATH}")
print(f"📊 Final shape: {df.shape}")
df.to_csv(OUTPUT_PATH, index=False)
print("✅ Done.")


📂 Loading technical + fundamental features...
🔁 Merging features on ['ticker', 'date']...
✅ Feature merge shape: (2894127, 46)
🎯 Loading target columns from df_labeled.csv...
➕ Merging targets into dataset...
💾 Saving final merged dataset with targets to: C:\Users\flass\OneDrive\AI Financial Model\S&P 500 Chatgpt Version\df_features_merged.csv
📊 Final shape: (2894127, 124)
✅ Done.
