In [1]:
"""
FE Notebook B ‚Äî Build FE Base Table
ÏûÖÎ†• : direction_df_topN.csv, train_monthly_v2.csv
Ï∂úÎ†• : fe_v1.csv
"""

import numpy as np
import pandas as pd


In [2]:
# ============================================================
# 0) Helper functions
# ============================================================

def compute_trend_slope(ts):
    x = np.arange(len(ts))
    if np.std(ts) < 1e-9:
        return 0.0
    return np.polyfit(x, ts, 1)[0]

def recent_growth(ts, window=6):
    if len(ts) < window + 1:
        return 0.0
    return ts[-1] - ts[-window]


# ============================================================
# 1) Build FE Base Table  (AutoN version)
# ============================================================

def build_fe_base(direction_df, monthly):

    print("üìå Fixing monthly duplicates...")
    monthly = monthly.copy()
    monthly["t"] = (monthly["year"] - monthly["year"].min()) * 12 + monthly["month"]

    monthly_agg = monthly.groupby(["item_id", "t"], as_index=False).agg(
        total_value=("total_value", "sum"),
        total_weight=("total_weight", "sum"),
        total_quantity=("total_quantity", "sum")
    )

    ts_val = monthly_agg.pivot(index="item_id", columns="t", values="total_value").fillna(0)
    ts_wgt = monthly_agg.pivot(index="item_id", columns="t", values="total_weight").fillna(0)

    print("üìä Pivot shapes:", ts_val.shape, ts_wgt.shape)

    rows = []

    for _, row in direction_df.iterrows():

        leader = row["leader"]
        follower = row["follower"]

        # lag_val Î∞òÎìúÏãú Ï°¥Ïû¨Ìï¥Ïïº Ìï®
        lag = int(row["lag_val"]) if "lag_val" in row else 0

        leader_ts = ts_val.loc[leader].values
        follower_ts = ts_val.loc[follower].values

        for t in range(1, 43):

            if t + 1 > 43:
                continue

            L_t = leader_ts[t - 1]
            L_diff = leader_ts[t - 1] - leader_ts[t - 2] if t >= 2 else 0
            L_roll3_mean = leader_ts[max(0, t - 3):t].mean()
            L_roll3_std = leader_ts[max(0, t - 3):t].std()

            target = follower_ts[t]

            # ===== Static Pair Features (direction_df_autoN.csv Í∏∞Î∞ò) =====
            pair_static = {
                "cos_val": row.get("cos_val", 0.0),
                "cos_wgt": row.get("cos_wgt", 0.0),
                "sign_agree_val": row.get("sign_agree_val", 0.0),
                "cc_val": row.get("cc_val", 0.0),
                "lag_val": lag,
                "dtw_sim": row.get("dtw_sim", 0.0),
                "hs_dist": row.get("hs_dist", 0),
                "cluster_sim": row.get("same_cluster", 0),
                "score_v1": row.get("score_v1", 0.0),
            }

            rows.append({
                "leader": leader,
                "follower": follower,
                "t": t,
                "L_t": L_t,
                "L_diff": L_diff,
                "L_roll3_mean": L_roll3_mean,
                "L_roll3_std": L_roll3_std,
                **pair_static,
                "target": target,
            })

    fe_df = pd.DataFrame(rows)
    print("FE base shape:", fe_df.shape)
    return fe_df


# ============================================================
# Ïã§Ìñâ (AutoN ÏÇ¨Ïö©!)
# ============================================================

monthly = pd.read_csv("/data/ephemeral/home/data/processed/train_monthly_v2.csv")
direction_df = pd.read_csv("direction_df_autoN.csv")   # ‚Üê Ï§ëÏöî!!

fe_df = build_fe_base(direction_df, monthly)

fe_df.to_csv("fe_v1.csv", index=False)

print("üéâ Saved fe_v1.csv")


üìå Fixing monthly duplicates...
üìä Pivot shapes: (100, 43) (100, 43)
FE base shape: (8568, 17)
üéâ Saved fe_v1.csv
