In [2]:
# Imports & Config
import os, re, math, json, time
from pathlib import Path
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Paths (edit as needed)
GROUPED_DIR = Path(r"D:\Courses\thesis\data\turning_keypoints_grouped")   # grouped by (PD_or_C / angle / type_of_turn)
MODEL_DIR   = Path(r"D:\Courses\thesis\data\second\models_turning_norm_250")         # where to save models (normalized version)
SYN_DIR     = Path(r"D:\Courses\thesis\data\second\synthetic_turning_norm_250")      # where to save generated CSVs
MODEL_DIR.mkdir(parents=True, exist_ok=True)
SYN_DIR.mkdir(parents=True, exist_ok=True)

# Data/Model params
NUM_KPT   = 17
INPUT_DIM = NUM_KPT * 2
WINDOW    = 10
BATCH_SIZE = 64
EPOCHS     = 250
LR         = 1e-3
SEED       = 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
print("Device:", DEVICE)


Device: cuda


In [6]:
# Skeleton graph + root joint used for normalization
connections = [
    [0, 1], [1, 2], [2, 3],
    [0, 4], [4, 5], [5, 6],
    [0, 7], [7, 8], [8, 9], [9, 10],
    [8, 11], [11, 12], [12, 13],
    [8, 14], [14, 15], [15, 16],
]
ROOT = 8  # central joint (hub) in this graph


In [7]:
# List groups and parse subject id from filename
def list_groups(grouped_dir: Path):
    """Return list of (pd_or_c, angle, turn_type, path, n_files)."""
    triples = []
    for pdc_dir in sorted([p for p in grouped_dir.iterdir() if p.is_dir()]):
        for angle_dir in sorted([p for p in pdc_dir.iterdir() if p.is_dir()]):
            for ttype_dir in sorted([p for p in angle_dir.iterdir() if p.is_dir()]):
                n = len(list(ttype_dir.glob("*.csv")))
                if n > 0:
                    triples.append((pdc_dir.name, angle_dir.name, ttype_dir.name, ttype_dir, n))
    return triples

def parse_subject_id_from_filename(name: str):
    # e.g., Pt204_C_n_350.csv -> 204
    m = re.match(r"Pt(\d+)_", name)
    if m:
        try: return int(m.group(1))
        except: return None
    return None


In [8]:
# PATCH: headerless-safe CSV loader + group loader (overrides previous versions)

def _has_numeric_header(df) -> bool:
    """True if most column labels look like numbers (means we accidentally used row 1 as header)."""
    if len(df.columns) == 0:
        return False
    num_like = 0
    for c in df.columns:
        try:
            float(str(c))
            num_like += 1
        except:
            pass
    return (num_like / len(df.columns)) >= 0.8

def read_keypoints_csv(path: Path, input_dim=34):
    """
    Robust loader for headerless files:
    - If the first read looks like it has numeric headers, re-read with header=None.
    - Always select the first `input_dim` numeric columns by position.
    - Drop NaN rows.
    - Require at least WINDOW+2 frames.
    - Return synthetic column names x1,y1,...,x17,y17 for consistency.
    """
    # first try (might wrongly treat first row as header)
    df = pd.read_csv(path, low_memory=False)
    if _has_numeric_header(df):
        # re-read with no header so the first row is data
        df = pd.read_csv(path, header=None, low_memory=False)

    num_df = df.apply(pd.to_numeric, errors='coerce')
    if num_df.shape[1] < input_dim:
        return None

    use = num_df.iloc[:, :input_dim].dropna()
    arr = use.to_numpy(dtype=np.float32)
    if arr.shape[0] < WINDOW + 2:
        return None

    # synthetic XY names for saving later
    cols = []
    for k in range(1, NUM_KPT + 1):
        cols += [f"x{k}", f"y{k}"]

    return arr, cols

def load_group_sequences(group_path: Path, input_dim=34):
    """
    Load all CSVs in the group using the headerless-safe reader.
    Do NOT enforce a shared header schema; we always output the same synthetic XY names.
    """
    file_list, seqs, headers, subjs = [], [], [], []
    for f in sorted(group_path.glob("*.csv")):
        out = read_keypoints_csv(f, input_dim=input_dim)
        if out is None:
            continue
        arr, cols = out
        file_list.append(f.name)
        seqs.append(arr)
        headers.append(cols)  # all identical synthetic headers
        subjs.append(parse_subject_id_from_filename(f.name))
    return file_list, seqs, headers, subjs


In [9]:
# Normalization utilities (sequence-level) + bone-length helpers
def _median_bone_scale_2d(points_17x2):
    centered = points_17x2 - points_17x2[ROOT]
    dists = [np.linalg.norm(centered[j] - centered[i]) for i, j in connections]
    return np.median(dists) if len(dists) else 1.0

def normalize_sequence_xy(seq_Tx34):
    """
    Per-sequence normalization using frame 0:
    - subtract root joint of frame 0 (translation)
    - divide by median bone length of frame 0 (scale)
    Returns: seq_norm (T,34), root0 (2,), scale0 (float)
    """
    T = seq_Tx34.shape[0]
    pts0 = seq_Tx34[0].reshape(17, 2)
    root0 = pts0[ROOT].copy()
    s0 = _median_bone_scale_2d(pts0)
    if not np.isfinite(s0) or s0 <= 1e-6:
        s0 = 1.0

    seq = seq_Tx34.reshape(T, 17, 2)
    seq_centered = seq - root0
    seq_scaled = seq_centered / s0
    return seq_scaled.reshape(T, 34), root0, float(s0)

def denormalize_frame_34(frame_34, root0, scale0):
    pts = frame_34.reshape(17, 2) * scale0 + root0
    return pts.reshape(34,)

def bone_lengths_batch(x_B_T_34):
    """x: (B,T,34) in normalized coords -> (B,T, num_bones) lengths."""
    B, T, F = x_B_T_34.shape
    coords = x_B_T_34.view(B, T, 17, 2)
    lens = []
    for (i, j) in connections:
        diff = coords[:, :, j, :] - coords[:, :, i, :]
        l = torch.norm(diff, dim=-1)  # (B,T)
        lens.append(l.unsqueeze(-1))
    return torch.cat(lens, dim=-1)


In [10]:
# Split (subject-aware), window count, scaler utils
def split_subject_aware(file_list, seqs, subjs, test_size=0.15, seed=42):
    man = pd.DataFrame({"idx": range(len(file_list)), "subj": subjs})
    known = man[man["subj"].notna()]
    unknown = man[man["subj"].isna()]

    train_idx, val_idx = [], []

    if len(known["subj"].unique()) >= 2:
        tr_subj, va_subj = train_test_split(sorted(known["subj"].unique()),
                                            test_size=test_size, random_state=seed, shuffle=True)
        train_idx += known[known["subj"].isin(tr_subj)]["idx"].tolist()
        val_idx   += known[known["subj"].isin(va_subj)]["idx"].tolist()
    else:
        if len(known) >= 2:
            tr, va = train_test_split(known["idx"].tolist(),
                                      test_size=max(1/len(known), test_size),
                                      random_state=seed, shuffle=True)
            train_idx += tr; val_idx += va

    if len(unknown) >= 2:
        tr, va = train_test_split(unknown["idx"].tolist(),
                                  test_size=max(1/len(unknown), test_size),
                                  random_state=seed, shuffle=True)
        train_idx += tr; val_idx += va
    else:
        train_idx += unknown["idx"].tolist()

    if len(val_idx) == 0:
        train_idx = man["idx"].tolist()
        val_idx = []

    train_seqs = [seqs[i] for i in train_idx]
    val_seqs   = [seqs[i] for i in val_idx]
    return train_seqs, val_seqs, train_idx, val_idx

def make_windows_count(seqs, win):
    return sum(max(0, len(s)-win) for s in seqs)

def scaler_from_params(mean, scale):
    sc = StandardScaler()
    sc.mean_ = np.array(mean, dtype=np.float64)
    sc.scale_ = np.array(scale, dtype=np.float64)
    sc.var_ = sc.scale_**2
    sc.n_features_in_ = len(sc.mean_)
    return sc


In [11]:
# Transformer-VAE (batch_first=True)
class PositionalEncodingBF(nn.Module):
    def __init__(self, d_model, max_len=1000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2, dtype=torch.float32) * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))  # (1,L,D)
    def forward(self, x):  # x: (B,T,D)
        return x + self.pe[:, :x.size(1), :]

class TransformerVAE_BF(nn.Module):
    def __init__(self, input_dim, win=WINDOW, d_model=96, nhead=6, num_layers=3, latent_dim=48):
        super().__init__()
        self.win = win
        self.input_linear = nn.Linear(input_dim, d_model)
        self.pos_enc = PositionalEncodingBF(d_model, max_len=win)
        enc_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)
        self.fc_mu = nn.Linear(d_model * win, latent_dim)
        self.fc_logvar = nn.Linear(d_model * win, latent_dim)
        self.fc_latent = nn.Linear(latent_dim, d_model * win)
        dec_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.decoder = nn.TransformerDecoder(dec_layer, num_layers=num_layers)
        self.output_linear = nn.Linear(d_model, input_dim)
    def encode(self, x):
        h = self.input_linear(x)
        h = self.pos_enc(h)
        out = self.encoder(h)                # (B,T,D)
        flat = out.reshape(out.size(0), -1)
        return self.fc_mu(flat), self.fc_logvar(flat)
    def reparameterize(self, mu, logvar, temperature=1.0):
        std = torch.exp(0.5 * logvar) * temperature
        eps = torch.randn_like(std)
        return mu + eps * std
    def decode(self, z):
        x = self.fc_latent(z).view(z.size(0), self.win, -1)
        tgt = self.pos_enc(x)
        memory = torch.zeros(z.size(0), self.win, tgt.size(2), device=z.device)  # (B,T,D)
        out = self.decoder(tgt, memory)
        return self.output_linear(out)
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z)
        return recon, mu, logvar


In [12]:
# Loss: reconstruction + KL + velocity + bone-length consistency
def loss_with_bone(recon_x, x, mu, logvar, w_kld=0.1, w_vel=0.15, w_bone=0.15):
    """
    x, recon_x: (B,T,34) in normalized coords
    """
    mse = F.mse_loss(recon_x, x)
    kld = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())

    vel_orig = torch.diff(x, dim=1)
    vel_recon = torch.diff(recon_x, dim=1)
    vel = F.mse_loss(vel_recon, vel_orig)

    bl_orig = bone_lengths_batch(x)
    bl_recon = bone_lengths_batch(recon_x)
    bone = F.mse_loss(bl_recon, bl_orig)

    return mse + w_kld * kld + w_vel * vel + w_bone * bone


In [13]:
# Train one group (normalized pipeline) and save checkpoint
class _WindowDataset(Dataset):
    def __init__(self, seqs_norm, scaler, win):
        xs = []
        for s in seqs_norm:
            s2 = scaler.transform(s)  # standardize after our normalization
            for i in range(win, len(s2)):
                xs.append(s2[i - win:i])
        self.samples = np.asarray(xs, np.float32)
    def __len__(self): return len(self.samples)
    def __getitem__(self, i): return torch.tensor(self.samples[i], dtype=torch.float32)

def train_one_group(triple, group_path: Path):
    file_list, seqs_raw, headers, subjs = load_group_sequences(group_path, INPUT_DIM)
    if len(seqs_raw) == 0:
        print(f"  [SKIP empty] {triple}")
        return None

    # 1) Per-sequence normalization
    seqs_norm = []
    for arr in seqs_raw:
        s_norm, _, _ = normalize_sequence_xy(arr)  # ignore root/scale during training
        seqs_norm.append(s_norm)

    # 2) Subject-aware split
    _, _, train_idx, val_idx = split_subject_aware(file_list, seqs_norm, subjs, test_size=0.15, seed=SEED)
    train_seqs = [seqs_norm[i] for i in train_idx] if len(train_idx) else seqs_norm
    val_seqs   = [seqs_norm[i] for i in val_idx]   if len(val_idx)   else []

    # enough windows?
    if make_windows_count(train_seqs, WINDOW) < 64:
        print(f"  [SKIP small] {triple} — not enough windows")
        return None

    # 3) Fit scaler on normalized train data
    scaler = StandardScaler().fit(np.concatenate(train_seqs, axis=0))

    # 4) Dataloaders
    train_loader = DataLoader(_WindowDataset(train_seqs, scaler, WINDOW),
                              batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
    val_loader = DataLoader(_WindowDataset(val_seqs, scaler, WINDOW),
                            batch_size=BATCH_SIZE, shuffle=False, drop_last=False) if len(val_seqs)>0 else None

    # 5) Model
    model = TransformerVAE_BF(INPUT_DIM, win=WINDOW).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=LR)

    best_val = float('inf'); best_state = None

    for epoch in range(1, EPOCHS+1):
        model.train(); total = 0.0
        for b in train_loader:
            b = b.to(DEVICE)
            opt.zero_grad()
            recon, mu, logvar = model(b)
            loss = loss_with_bone(recon, b, mu, logvar)
            loss.backward(); opt.step()
            total += loss.item()

        if (epoch % 5 == 0) or epoch == 1:
            if val_loader and len(val_loader) > 0:
                model.eval(); vtot = 0.0
                with torch.no_grad():
                    for vb in val_loader:
                        vb = vb.to(DEVICE)
                        r, m, l = model(vb)
                        vtot += loss_with_bone(r, vb, m, l).item()
                vloss = vtot / len(val_loader)
            else:
                vloss = float('nan')

            print(f"  Epoch {epoch:02d}/{EPOCHS} — train {total/len(train_loader):.5f} val {vloss:.5f}")
            if not math.isnan(vloss) and vloss < best_val:
                best_val = vloss; best_state = model.state_dict()

    if best_state is not None:
        model.load_state_dict(best_state)

    # save checkpoint
    header_series = pd.Series(["|".join(h) for h in headers if h])
    header_cols = header_series.mode().iloc[0].split("|") if not header_series.empty \
                  else [f"{ax}{i}" for i in range(1, NUM_KPT+1) for ax in ("x","y")]
    tag = "__".join(triple)
    ckpt = MODEL_DIR / f"TVAE_{tag}.pt"
    torch.save({
        "model_state": model.state_dict(),
        "scaler_mean": scaler.mean_,
        "scaler_scale": scaler.scale_,
        "header_cols": header_cols,
        "meta": {
            "group": triple,
            "input_dim": INPUT_DIM,
            "window": WINDOW,
            "epochs": EPOCHS,
            "best_val": None if math.isinf(best_val) else float(best_val),
            "date": time.strftime("%Y-%m-%d %H:%M:%S"),
            "normalized": True,
            "root_index": ROOT,
            "connections": connections,
        }
    }, ckpt)
    return ckpt


In [23]:
# Train all groups once & write a manifest
groups = list_groups(GROUPED_DIR)
print(f"Discovered {len(groups)} groups.")

manifest_rows = []
for pd_or_c, angle, ttype, path, n_files in groups:
    triple = (pd_or_c, angle, ttype)
    tag = "__".join(triple)
    ckpt_path = MODEL_DIR / f"TVAE_{tag}.pt"
    if ckpt_path.exists():
        print(f"[SKIP existing] {triple}")
        saved = True
        ckpt = ckpt_path
    else:
        print(f"[TRAIN] {triple}  ({n_files} files)")
        try:
            ckpt = train_one_group(triple, path)
            saved = ckpt is not None
        except Exception as e:
            print(f"  ERROR training {triple}: {e}")
            ckpt = None
            saved = False

    manifest_rows.append({
        "PD_or_C": pd_or_c,
        "turning_angle": angle,
        "type_of_turn": ttype,
        "n_files": n_files,
        "model_saved": saved,
        "model_path": str(ckpt) if ckpt else None,
    })

manifest = pd.DataFrame(manifest_rows)
manifest_csv = MODEL_DIR / "_models_manifest.csv"
manifest.to_csv(manifest_csv, index=False)
print("\nSaved model manifest:", manifest_csv)
display(manifest.head(20))


Discovered 21 groups.
[TRAIN] ('C', '135_degrees', '-')  (1 files)
  [SKIP small] ('C', '135_degrees', '-') — not enough windows
[TRAIN] ('C', '135_degrees', 'pivot_turn')  (74 files)
  Epoch 01/250 — train 0.90586 val 1.39194
  Epoch 05/250 — train 0.40826 val 0.98006
  Epoch 10/250 — train 0.38053 val 0.95755
  Epoch 15/250 — train 0.37228 val 0.89516
  Epoch 20/250 — train 0.35716 val 0.83862
  Epoch 25/250 — train 0.33993 val 0.88387
  Epoch 30/250 — train 0.38931 val 0.94608
  Epoch 35/250 — train 0.32523 val 0.87369
  Epoch 40/250 — train 0.32296 val 0.80235
  Epoch 45/250 — train 0.19530 val 0.43096
  Epoch 50/250 — train 0.16806 val 0.38778
  Epoch 55/250 — train 0.15317 val 0.38185
  Epoch 60/250 — train 0.15131 val 0.37865
  Epoch 65/250 — train 0.14225 val 0.38823
  Epoch 70/250 — train 0.14834 val 0.37127
  Epoch 75/250 — train 0.13752 val 0.38633
  Epoch 80/250 — train 0.12877 val 0.39096
  Epoch 85/250 — train 0.12523 val 0.37405
  Epoch 90/250 — train 0.12387 val 0.36287

Unnamed: 0,PD_or_C,turning_angle,type_of_turn,n_files,model_saved,model_path
0,C,135_degrees,-,1,False,
1,C,135_degrees,pivot_turn,74,True,D:\Courses\thesis\data\second\models_turning_n...
2,C,135_degrees,step_turn,3,True,D:\Courses\thesis\data\second\models_turning_n...
3,C,180_degrees,pivot_turn,234,True,D:\Courses\thesis\data\second\models_turning_n...
4,C,180_degrees,step_turn,12,True,D:\Courses\thesis\data\second\models_turning_n...
5,C,225_degrees,pivot_turn,3,True,D:\Courses\thesis\data\second\models_turning_n...
6,C,360_degrees,pivot_turn,1,True,D:\Courses\thesis\data\second\models_turning_n...
7,C,90_degrees,-,10,True,D:\Courses\thesis\data\second\models_turning_n...
8,C,90_degrees,pivot_turn,392,True,D:\Courses\thesis\data\second\models_turning_n...
9,C,90_degrees,step_turn,13,True,D:\Courses\thesis\data\second\models_turning_n...


In [14]:
# Load a saved model (handles PyTorch 2.6+ weights_only change)
import pickle

def load_model_for_group(triple):
    tag = "__".join(triple)
    ckpt = MODEL_DIR / f"TVAE_{tag}.pt"
    if not ckpt.exists():
        raise FileNotFoundError(f"No checkpoint for {triple}: {ckpt}")

    # Try normal load, then fallback to weights_only=False (safe if you trust your file)
    try:
        data = torch.load(ckpt, map_location=DEVICE)
    except pickle.UnpicklingError:
        data = torch.load(ckpt, map_location=DEVICE, weights_only=False)
    except Exception as e:
        # allowlist numpy reconstruct if needed
        try:
            from torch.serialization import add_safe_globals
            import numpy as np
            add_safe_globals([np.core.multiarray._reconstruct])
            data = torch.load(ckpt, map_location=DEVICE)
        except Exception:
            raise e

    model = TransformerVAE_BF(INPUT_DIM, win=data["meta"]["window"]).to(DEVICE)
    model.load_state_dict(data["model_state"])
    model.eval()
    scaler = scaler_from_params(data["scaler_mean"], data["scaler_scale"])
    header_cols = data.get("header_cols", [f"{ax}{i}" for i in range(1, NUM_KPT+1) for ax in ("x","y")])
    return model, scaler, header_cols

def generate_sequence(model, scaler, target_len, seed_seq=None, n_variants=1, temperature=0.95):
    """
    Generate in NORMALIZED space; if 'seed_seq' provided, normalize by its frame-0
    root & scale, then denormalize outputs back to those pixels so size stays constant.
    """
    variants = []
    with torch.no_grad():
        for _ in range(n_variants):
            if seed_seq is not None and len(seed_seq) >= WINDOW:
                seed_norm, root0, scale0 = normalize_sequence_xy(seed_seq)          # (T,34), root, scale
                seed_scaled = scaler.transform(seed_norm[:WINDOW])
                seed_tensor = torch.tensor(seed_scaled[None], dtype=torch.float32, device=DEVICE)
                mu, logvar = model.encode(seed_tensor)

                current_window_norm = seed_norm[:WINDOW].copy()
                out_frames = []
                for _t in range(target_len):
                    z = model.reparameterize(mu, logvar, temperature=temperature)
                    synth_scaled = model.decode(z)[0].detach().cpu().numpy()             # (T,F) standardized
                    frame_norm = scaler.inverse_transform(synth_scaled[-1][None])[0]     # (34,) normalized
                    frame_pix  = denormalize_frame_34(frame_norm, root0, scale0)        # keep seed size
                    out_frames.append(frame_pix)

                    current_window_norm = np.vstack([current_window_norm, frame_norm])[-WINDOW:]
                    win_scaled = scaler.transform(current_window_norm)
                    mu, logvar = model.encode(torch.tensor(win_scaled[None], dtype=torch.float32, device=DEVICE))
                variants.append(np.array(out_frames))

            else:
                # Seedless: unit scale at origin (you can swap in a real seed's root/scale if desired)
                latent_dim = model.fc_latent.in_features
                z0 = torch.randn(1, latent_dim, device=DEVICE)
                synth_scaled = model.decode(z0)[0].detach().cpu().numpy()
                current_window_norm = scaler.inverse_transform(synth_scaled)   # (T,F)

                root0 = np.zeros(2, dtype=np.float32); scale0 = 1.0
                out_frames = []
                win_scaled = scaler.transform(current_window_norm[-WINDOW:])
                mu, logvar = model.encode(torch.tensor(win_scaled[None], dtype=torch.float32, device=DEVICE))

                for _t in range(target_len):
                    z = model.reparameterize(mu, logvar, temperature=temperature)
                    synth_scaled = model.decode(z)[0].detach().cpu().numpy()
                    frame_norm = scaler.inverse_transform(synth_scaled[-1][None])[0]
                    frame_pix  = denormalize_frame_34(frame_norm, root0, scale0)
                    out_frames.append(frame_pix)

                    current_window_norm = np.vstack([current_window_norm, frame_norm])[-WINDOW:]
                    win_scaled = scaler.transform(current_window_norm)
                    mu, logvar = model.encode(torch.tensor(win_scaled[None], dtype=torch.float32, device=DEVICE))

                variants.append(np.array(out_frames))
    return variants


In [15]:
# Example usage: set to any trained triple from the manifest
CHOSEN_GROUP = ('PD', '180_degrees', 'pivot_turn')   # <-- edit to a trained triple

try:
    model, scaler, header_cols = load_model_for_group(CHOSEN_GROUP)
    group_path = GROUPED_DIR / CHOSEN_GROUP[0] / CHOSEN_GROUP[1] / CHOSEN_GROUP[2]
    some_file = next(group_path.glob("*.csv"))
    seed_arr, _ = read_keypoints_csv(some_file, INPUT_DIM)

    target_len = len(seed_arr)

    # Seeded (keeps person size constant from the seed)
    variants = generate_sequence(model, scaler, target_len=target_len, seed_seq=seed_arr,
                                 n_variants=1, temperature=0.95)
    synth = variants[0]
    out_csv = SYN_DIR / f"synthetic_{CHOSEN_GROUP[0]}__{CHOSEN_GROUP[1]}__{CHOSEN_GROUP[2]}_seeded_norm.csv"
    pd.DataFrame(synth, columns=header_cols).to_csv(out_csv, index=False, float_format="%.1f")
    print("Saved (seeded):", out_csv)

    # Seedless (unit-size stick figure at origin)
    variants2 = generate_sequence(model, scaler, target_len=target_len, seed_seq=None,
                                  n_variants=1, temperature=0.95)
    synth2 = variants2[0]
    out_csv2 = SYN_DIR / f"synthetic_{CHOSEN_GROUP[0]}__{CHOSEN_GROUP[1]}__{CHOSEN_GROUP[2]}_seedless_norm.csv"
    pd.DataFrame(synth2, columns=header_cols).to_csv(out_csv2, index=False, float_format="%.1f")
    print("Saved (seedless):", out_csv2)

except StopIteration:
    print("No CSV found in the chosen group's folder. Pick another CHOSEN_GROUP.")
except FileNotFoundError as e:
    print(e)


Saved (seeded): D:\Courses\thesis\data\second\synthetic_turning_norm_250\synthetic_PD__180_degrees__pivot_turn_seeded_norm.csv
Saved (seedless): D:\Courses\thesis\data\second\synthetic_turning_norm_250\synthetic_PD__180_degrees__pivot_turn_seedless_norm.csv


In [16]:
# Generate N seeded + seedless variants from one chosen group
import random
from pathlib import Path

CHOSEN_GROUP = ('PD', '180_degrees', 'pivot_turn')  # edit
N_SEEDED_VARIANTS   = 10
N_SEEDLESS_VARIANTS = 5
TEMP = 0.9
#LOCK_MODE = None   # "per_frame_root", "anchor", or None
FIXED_SEED = 123               # for reproducibility; set to None to randomize

if FIXED_SEED is not None:
    random.seed(FIXED_SEED); np.random.seed(FIXED_SEED); torch.manual_seed(FIXED_SEED)

# load model + a seed file
model, scaler, header_cols = load_model_for_group(CHOSEN_GROUP)
group_path = GROUPED_DIR / CHOSEN_GROUP[0] / CHOSEN_GROUP[1] / CHOSEN_GROUP[2]
seed_file = sorted(group_path.glob("*.csv"))[0]  # or pick a specific one
seed_arr, _ = read_keypoints_csv(seed_file, INPUT_DIM)
target_len = len(seed_arr)

print("Seed file:", seed_file.name, "frames:", target_len)

# ---- seeded variants ----
for v in range(N_SEEDED_VARIANTS):
    [synth] = generate_sequence(
        model, scaler, target_len=target_len,
        seed_seq=seed_arr, n_variants=1,
        temperature=TEMP
    )
    out_csv = SYN_DIR / f"synthetic_{CHOSEN_GROUP[0]}__{CHOSEN_GROUP[1]}__{CHOSEN_GROUP[2]}__seeded_v{v:02d}.csv"
    pd.DataFrame(synth, columns=header_cols).to_csv(out_csv, index=False, float_format="%.1f")
    print("Saved:", out_csv.name)

# ---- seedless variants ----
for v in range(N_SEEDLESS_VARIANTS):
    [synth] = generate_sequence(
        model, scaler, target_len=target_len,
        seed_seq=None, n_variants=1,
        temperature=TEMP
    )
    out_csv = SYN_DIR / f"synthetic_{CHOSEN_GROUP[0]}__{CHOSEN_GROUP[1]}__{CHOSEN_GROUP[2]}__seedless_v{v:02d}.csv"
    pd.DataFrame(synth, columns=header_cols).to_csv(out_csv, index=False, float_format="%.1f")
    print("Saved:", out_csv.name)


Seed file: Pt253_PD_n_3482.csv frames: 72
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v00.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v01.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v02.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v03.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v04.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v05.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v06.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v07.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v08.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seeded_v09.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seedless_v00.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seedless_v01.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seedless_v02.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seedless_v03.csv
Saved: synthetic_PD__180_degrees__pivot_turn__seedless_v04.csv
