In [None]:
# =========================
# CONFIG
# =========================
CONFIG = {
    "train_csv": "TrainingSet.csv",
    "val_csv":   "ValidationSet.csv",

    # Normalization mode: "physics" or "zscore"
    "NORM_MODE": "physics",

    # Model hyperparameters
    "INPUT_DIM": 4,        # (x, y, vx, vy)
    "HIDDEN_SIZE": 80,
    "NUM_LAYERS": 4,
    "DROPOUT": 0.1,

    # Optimization
    "BATCH_SIZE": 32,
    "NUM_EPOCHS": 17,
    "LR": 1e-3,
    "WEIGHT_DECAY": 1e-5,
    "GRAD_CLIP": 1.0,

    # Early stopping
    "EARLY_STOP_PATIENCE": 5,
    "MIN_DELTA": 1e-5,

    # LR scheduler
    "SCHEDULER_FACTOR": 0.5,
    "SCHEDULER_PATIENCE": 2,

    # Random seed
    "SEED": 42,

    # Device preference
    "FORCE_CPU": False,

    # Saving
    "SAVE_PREFIX": "lstm_fullseq_1s",
}

# =========================
# Imports & Setup
# =========================
import os, shutil, zipfile, random, math
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import matplotlib.pyplot as plt
from matplotlib.patches import Circle

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(CONFIG["SEED"])

device = torch.device("cpu" if CONFIG["FORCE_CPU"] or not torch.cuda.is_available() else "cuda")
print("Using device:", device)

DT = 1.0 / 30.0

# =========================
# Load & Prepare Data
# =========================
def load_and_prepare(csv_path):
    df = pd.read_csv(csv_path)
    if "fut1_x" in df.columns:
        df = df.rename(columns={"fut1_x":"future1_x","fut1_y":"future1_y"})
    needed = {"time_s","ped_id","x","y","vx","vy","future1_x","future1_y"}
    missing = needed - set(df.columns)
    if missing:
        raise ValueError(f"{csv_path} missing columns: {missing}")
    df = df.sort_values(["ped_id","time_s"]).reset_index(drop=True)
    df = df.dropna(subset=["future1_x","future1_y"]).reset_index(drop=True)
    return df

train_df = load_and_prepare(CONFIG["train_csv"])
val_df   = load_and_prepare(CONFIG["val_csv"])

# =========================
# Normalizer
# =========================
class Normalizer:
    def __init__(self, mode="physics"):
        self.mode = mode
        self.x_scale = 3.0
        self.y_min   = 0.1
        self.y_range = 3.6 - 0.1
        self.v_scale = 2.0
        self.stats   = {}
    def fit_zscore(self, df):
        for c in ["x","y","vx","vy","future1_x","future1_y"]:
            m, s = df[c].mean(), df[c].std()+1e-8
            self.stats[c] = {"mean":m,"std":s}
    def transform_rows(self, df):
        if self.mode=="physics":
            df["x_n"]  = df["x"]/self.x_scale
            df["y_n"]  = (df["y"]-self.y_min)/self.y_range
            df["vx_n"] = df["vx"]/self.v_scale
            df["vy_n"] = df["vy"]/self.v_scale
            df["f1x_n"] = df["future1_x"]/self.x_scale
            df["f1y_n"] = (df["future1_y"]-self.y_min)/self.y_range
        else:
            for base,new in [("x","x_n"),("y","y_n"),("vx","vx_n"),("vy","vy_n"),
                             ("future1_x","f1x_n"),("future1_y","f1y_n")]:
                df[new] = (df[base]-self.stats[base]["mean"])/self.stats[base]["std"]
        return df
    def inverse_pos(self,xn,yn):
        if self.mode=="physics":
            return xn*self.x_scale, yn*self.y_range+self.y_min
        s_x,s_y = self.stats["future1_x"]["std"],self.stats["future1_y"]["std"]
        m_x,m_y = self.stats["future1_x"]["mean"],self.stats["future1_y"]["mean"]
        return xn*s_x+m_x, yn*s_y+m_y

normalizer = Normalizer(CONFIG["NORM_MODE"])
if CONFIG["NORM_MODE"]=="zscore":
    normalizer.fit_zscore(train_df)
train_df = normalizer.transform_rows(train_df)
val_df   = normalizer.transform_rows(val_df)

# =========================
# Build Sequences
# =========================
def build_seqs(df):
    seqs=[]
    for pid,g in df.groupby("ped_id"):
        g=g.sort_values("time_s").reset_index(drop=True)
        if len(g)<1: continue
        feats = g[["x_n","y_n","vx_n","vy_n"]].values.astype(np.float32)
        targs = g[["f1x_n","f1y_n"]].values.astype(np.float32)
        seqs.append((feats,targs,pid))
    return seqs

train_seqs = build_seqs(train_df)
val_seqs   = build_seqs(val_df)

# =========================
# Dataset & Collate
# =========================
class PedDataset(Dataset):
    def __init__(self,seqs): self.seqs=seqs
    def __len__(self): return len(self.seqs)
    def __getitem__(self,i):
        f,t,p = self.seqs[i]
        return torch.from_numpy(f),torch.from_numpy(t),p

def collate(batch):
    feats,targs,pids = zip(*batch)
    L = torch.tensor([f.shape[0] for f in feats],dtype=torch.long)
    fp = pad_sequence(feats, batch_first=True)
    tp = pad_sequence(targs, batch_first=True)
    m = torch.zeros(fp.size(0),fp.size(1))
    for i,l in enumerate(L): m[i,:l]=1
    return fp,tp,m,L,pids

train_loader = DataLoader(PedDataset(train_seqs),batch_size=CONFIG["BATCH_SIZE"],
                          shuffle=True,collate_fn=collate)
val_loader   = DataLoader(PedDataset(val_seqs),batch_size=CONFIG["BATCH_SIZE"],
                          shuffle=False,collate_fn=collate)

# =========================
# Model
# =========================
class LSTMNet(nn.Module):
    def __init__(self,in_dim,hidden,layers,drop):
        super().__init__()
        self.lstm = nn.LSTM(in_dim,hidden,layers,
                            batch_first=True,
                            dropout=drop if layers>1 else 0.0)
        self.head = nn.Linear(hidden,2)
    def forward(self,x,lengths):
        packed = nn.utils.rnn.pack_padded_sequence(x,lengths.cpu(),
                                                    batch_first=True,
                                                    enforce_sorted=False)
        out,_ = self.lstm(packed)
        out,_ = nn.utils.rnn.pad_packed_sequence(out,batch_first=True)
        return self.head(out)

model = LSTMNet(CONFIG["INPUT_DIM"],
                CONFIG["HIDDEN_SIZE"],
                CONFIG["NUM_LAYERS"],
                CONFIG["DROPOUT"]).to(device)

# =========================
# Loss & Metric
# =========================
def masked_mse(p,t,m):
    d=(p-t)**2
    return (d.sum(-1)*m).sum()/m.sum().clamp_min(1.0)

def rmse_1s(p,t,m):
    with torch.no_grad():
        valid=(m>0)
        px,py = p[valid][:,0],p[valid][:,1]
        tx,ty = t[valid][:,0],t[valid][:,1]
        x_p,y_p = normalizer.inverse_pos(px.cpu().numpy(),py.cpu().numpy())
        x_t,y_t = normalizer.inverse_pos(tx.cpu().numpy(),ty.cpu().numpy())
        return float(np.sqrt(((x_p-x_t)**2+(y_p-y_t)**2).mean()))

optimizer = torch.optim.Adam(model.parameters(),
                             lr=CONFIG["LR"],
                             weight_decay=CONFIG["WEIGHT_DECAY"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,mode="min",
    factor=CONFIG["SCHEDULER_FACTOR"],
    patience=CONFIG["SCHEDULER_PATIENCE"]
)

best_loss=float("inf");no_imp=0;best_state=None

# =========================
# Training Loop
# =========================
for epoch in range(1,CONFIG["NUM_EPOCHS"]+1):
    model.train()
    tloss,tn=0,0
    for f,t,m,L,_ in train_loader:
        f,t,m = f.to(device),t.to(device),m.to(device)
        optimizer.zero_grad()
        pred = model(f,L)
        loss = masked_mse(pred,t,m)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(),CONFIG["GRAD_CLIP"])
        optimizer.step()
        tloss += loss.item()*m.sum().item()
        tn    += m.sum().item()
    train_loss = tloss/tn

    model.eval()
    vloss,vn,v_rmse=0,0,0
    with torch.no_grad():
        for f,t,m,L,_ in val_loader:
            f,t,m = f.to(device),t.to(device),m.to(device)
            pred = model(f,L)
            vloss += masked_mse(pred,t,m).item()*m.sum().item()
            vn    += m.sum().item()
            v_rmse += rmse_1s(pred,t,m)*f.size(0)
    val_loss = vloss/vn
    val_rmse = v_rmse/len(val_loader.dataset)
    scheduler.step(val_loss)

    print(f"Epoch {epoch:02d} | TrainLoss {train_loss:.6f} | "
          f"ValLoss {val_loss:.6f} | ValRMSE_1s {val_rmse:.3f}")

    if val_loss+CONFIG["MIN_DELTA"]<best_loss:
        best_loss=val_loss; no_imp=0
        best_state={"model":model.state_dict(),
                    "opt":optimizer.state_dict(),
                    "epoch":epoch,"loss":best_loss}
    else:
        no_imp+=1
        if no_imp>=CONFIG["EARLY_STOP_PATIENCE"]:
            print("Early stopping.")
            break

model.load_state_dict(best_state["model"])
torch.save(best_state,f"{CONFIG['SAVE_PREFIX']}_model.pt")

# =========================
# Full‑Track Visualization (+1 s horizon only)
# =========================
OUTPUT_DIR         = "val_tracks_1s_horizons"
ZIP_NAME           = "val_tracks_1s_horizons.zip"
MAKE_ZIP           = True
CLEAR_OUTPUT_FIRST = True
SHOW_FIGS          = False

ANCHOR_PERIOD_S     = 1.0   # show prediction every 1 s
ANCHOR_START_OFFSET = 0.0   # start at t0

MARKER_PATH   = 2
MARKER_TRUE1  = 6
MARKER_PRED1  = 6
SHOW_SEG_LINES = True
TOL_RADIUS = 0.3  # meters

if CLEAR_OUTPUT_FIRST and os.path.isdir(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)

def build_feat_norm_track(df_slice):
    if normalizer.mode == "physics":
        return np.stack([
            df_slice.x / normalizer.x_scale,
            (df_slice.y - normalizer.y_min) / normalizer.y_range,
            df_slice.vx / normalizer.v_scale,
            df_slice.vy / normalizer.v_scale
        ], axis=-1).astype(np.float32)
    stats = normalizer.stats
    return np.stack([
        (df_slice.x - stats['x']['mean'])/stats['x']['std'],
        (df_slice.y - stats['y']['mean'])/stats['y']['std'],
        (df_slice.vx - stats['vx']['mean'])/stats['vx']['std'],
        (df_slice.vy - stats['vy']['mean'])/stats['vy']['std'],
    ], axis=-1).astype(np.float32)

def predict_track_1s(track_df):
    feat_norm = build_feat_norm_track(track_df)
    feats_t = torch.from_numpy(feat_norm).unsqueeze(0).to(device)
    lengths = torch.tensor([feat_norm.shape[0]])
    with torch.no_grad():
        preds = model(feats_t, lengths)[0].cpu().numpy()
    p1x_n, p1y_n = preds[:,0], preds[:,1]
    p1x, p1y = normalizer.inverse_pos(p1x_n, p1y_n)
    t1x = track_df.future1_x.values
    t1y = track_df.future1_y.values
    return track_df.time_s.values, track_df.x.values, track_df.y.values, t1x, t1y, p1x, p1y

def compute_anchor_mask(times, dt_frame, period, start):
    t0 = times[0]
    tol = dt_frame/2
    mask = np.zeros_like(times, dtype=bool)
    for i, t in enumerate(times):
        if t + tol < t0 + start:
            continue
        k = round((t - (t0 + start)) / period)
        target = t0 + start + k * period
        if abs(t - target) <= tol:
            mask[i] = True
    return mask

def plot_and_save_1s(pid, df, out_path):
    times, cx, cy, tx, ty, px, py = predict_track_1s(df)
    anchor_mask = compute_anchor_mask(times, DT, ANCHOR_PERIOD_S, ANCHOR_START_OFFSET)
    anchors = np.where(anchor_mask)[0]

    plt.figure(figsize=(7,7))
    ax = plt.gca()

    # Draw the pedestrian path
    ax.plot(cx, cy, '-o', ms=MARKER_PATH, color='black', alpha=0.8, label='Path')

    # Plot predictions, true positions, and tolerance zones
    for i in anchors:
        # Green transparent circle around true point (0.3 m radius)
        circle = Circle((tx[i], ty[i]), TOL_RADIUS, color='green', alpha=0.2)
        ax.add_patch(circle)

        # True point and predicted point
        ax.plot(tx[i], ty[i], 'o', ms=MARKER_TRUE1, color='green', alpha=0.9)
        ax.plot(px[i], py[i], 'x', ms=MARKER_PRED1, color='red')

        # Optional connecting lines
        if SHOW_SEG_LINES:
            ax.plot([cx[i], px[i]], [cy[i], py[i]], color='red', alpha=0.3, lw=0.8)
            ax.plot([cx[i], tx[i]], [cy[i], ty[i]], color='green', alpha=0.3, lw=0.8)

    # Start and end markers
    ax.plot(cx[0], cy[0], '*', ms=12, color='blue', label='Start')
    ax.plot(cx[-1], cy[-1], '*', ms=12, color='orange', label='End')

    # Legend including the circle
    legend_elements = [
        plt.Line2D([0], [0], color='black', marker='o', linestyle='-', markersize=MARKER_PATH, label='Path'),
        plt.Line2D([0], [0], color='green', marker='o', linestyle='None', markersize=MARKER_TRUE1, label='True +1 s'),
        plt.Line2D([0], [0], color='red', marker='x', linestyle='None', markersize=MARKER_PRED1, label='Pred +1 s'),
        plt.Line2D([0], [0], color='blue', marker='*', linestyle='None', markersize=12, label='Start'),
        plt.Line2D([0], [0], color='orange', marker='*', linestyle='None', markersize=12, label='End'),
        plt.Line2D([0], [0], marker='o', color='green', markersize=15, alpha=0.2, linestyle='None', label='0.3 m Zone')
    ]
    ax.legend(handles=legend_elements, loc='best')

    # Formatting
    ax.set_title(f"Ped {pid}: +1 s horizon every {ANCHOR_PERIOD_S:.1f}s")
    ax.set_xlabel("X (m)")
    ax.set_ylabel("Y (m)")
    ax.set_xlim(-3, 3)
    ax.set_ylim(0.1, 3.6)
    ax.grid(True)

    if SHOW_FIGS:
        plt.show()
    else:
        plt.savefig(out_path, dpi=150, bbox_inches='tight')
        plt.close()

# Generate images
groups = val_df.groupby('ped_id')
print(f"Generating +1 s sparse plots for {len(groups)} pedestrians…")
for pid, g in groups:
    g = g.sort_values('time_s').reset_index(drop=True)
    fn = f"ped_{pid:03d}.png"
    plot_and_save_1s(pid, g, os.path.join(OUTPUT_DIR, fn))

# Zip files if needed
if MAKE_ZIP:
    with zipfile.ZipFile(ZIP_NAME, 'w', zipfile.ZIP_DEFLATED) as zf:
        for fn in sorted(os.listdir(OUTPUT_DIR)):
            if fn.endswith('.png'):
                zf.write(os.path.join(OUTPUT_DIR, fn), arcname=fn)
    print(f"Created ZIP → {ZIP_NAME}")


Using device: cuda
Epoch 01 | TrainLoss 0.517970 | ValLoss 0.389923 | ValRMSE_1s 1.936
Epoch 02 | TrainLoss 0.283740 | ValLoss 0.140564 | ValRMSE_1s 1.220
Epoch 03 | TrainLoss 0.117938 | ValLoss 0.085253 | ValRMSE_1s 0.990
Epoch 04 | TrainLoss 0.085434 | ValLoss 0.067121 | ValRMSE_1s 0.871
Epoch 05 | TrainLoss 0.042303 | ValLoss 0.024390 | ValRMSE_1s 0.510
Epoch 06 | TrainLoss 0.021193 | ValLoss 0.015934 | ValRMSE_1s 0.409
Epoch 07 | TrainLoss 0.017104 | ValLoss 0.013733 | ValRMSE_1s 0.378
Epoch 08 | TrainLoss 0.015116 | ValLoss 0.012696 | ValRMSE_1s 0.363
Epoch 09 | TrainLoss 0.014014 | ValLoss 0.012427 | ValRMSE_1s 0.358
Epoch 10 | TrainLoss 0.013308 | ValLoss 0.011268 | ValRMSE_1s 0.342
Epoch 11 | TrainLoss 0.012633 | ValLoss 0.010999 | ValRMSE_1s 0.338
Epoch 12 | TrainLoss 0.012119 | ValLoss 0.010422 | ValRMSE_1s 0.328
Epoch 13 | TrainLoss 0.011745 | ValLoss 0.010310 | ValRMSE_1s 0.326
Epoch 14 | TrainLoss 0.011528 | ValLoss 0.009872 | ValRMSE_1s 0.319
Epoch 15 | TrainLoss 0.011174