In [23]:
import os, re, glob, numpy as np

base = Path(r"C:\Users\Joseph\generative-health-models")


In [3]:
# === EDIT THIS if your sample folder is different ===
BASE = r"../results/samples_v14_mm_tune4_linearup_v4"

# Margins (used just to estimate boundary/clipping fraction)
ECG_MARGIN = 0.96
LOW_MARGIN = 0.92

def _epochs_from(pattern):
    files = [f for f in glob.glob(os.path.join(BASE, pattern)) if "_DENORM" not in f]
    out = {}
    for f in files:
        m = re.search(r'epoch_(\d+)', os.path.basename(f))
        if m: out[int(m.group(1))] = f
    return out

ecg_map = _epochs_from("fake_ecg_epoch_*.npy")
low_map = _epochs_from("fake_low_epoch_*.npy")

def ecg_stats(path):
    x = np.load(path)           # (N, 5250, 1)
    x = x.reshape(len(x), -1)
    std_mean = x.std(axis=1).mean()
    clip = (np.abs(x) > ECG_MARGIN).mean()
    return {"ecg_std": float(std_mean), "ecg_clip": float(clip), "n_ecg": int(len(x))}

def low_stats(path):
    x = np.load(path)           # (N, 120, 2) -> [EDA, RESP]
    eda = x[...,0].reshape(len(x), -1)
    resp = x[...,1].reshape(len(x), -1)
    eda_std = eda.std(axis=1).mean()
    resp_std = resp.std(axis=1).mean()
    low_clip = (np.abs(x) > LOW_MARGIN).mean()
    return {
        "eda_std": float(eda_std),
        "resp_std": float(resp_std),
        "low_clip": float(low_clip),
        "n_low": int(len(x))
    }

rows = []
for e in sorted(set(ecg_map.keys()) | set(low_map.keys())):
    row = {"epoch": e}
    if e in ecg_map: row.update(ecg_stats(ecg_map[e]))
    if e in low_map: row.update(low_stats(low_map[e]))
    # crude score: aim for std≈1 and little clipping
    score = 0.0
    if "ecg_std" in row:  score += abs(row["ecg_std"] - 1.0)
    if "eda_std" in row:  score += 0.5*abs(row["eda_std"] - 1.0)
    if "resp_std" in row: score += 0.5*abs(row["resp_std"] - 1.0)
    score += 5.0 * (row.get("ecg_clip",0.0) + row.get("low_clip",0.0))
    row["score"] = score
    rows.append(row)

# Pretty print
if not rows:
    print("No matching files found under:", BASE)
else:
    rows = sorted(rows, key=lambda r: r["score"])
    print("Top candidates (lower score is better):")
    print("epoch | ecg_std  eda_std  resp_std | ecg_clip low_clip | n_ecg n_low | score")
    for r in rows[:8]:
        print(f'{r["epoch"]:>5} | {r.get("ecg_std",np.nan):>7.3f} {r.get("eda_std",np.nan):>7.3f} {r.get("resp_std",np.nan):>8.3f} |'
              f' {r.get("ecg_clip",0):>7.3f} {r.get("low_clip",0):>7.3f} | {r.get("n_ecg",0):>5} {r.get("n_low",0):>5} | {r["score"]:>5.3f}')

    best = rows[0]["epoch"]
    ecg_f = ecg_map.get(best)
    low_f = low_map.get(best)
    out_npz = os.path.join(BASE, f"gen_e{best:03d}_lowecg.npz")

    print("\nBest epoch suggestion:", best)
    print("Files:")
    print("  ECG :", ecg_f)
    print("  LOW :", low_f)
    print("NPZ will be written to:")
    print(" ", out_npz)

    # Also print the exact packing command you can run:
    print("\nPacking command:")
    print(f'python pack_fake_npz.py --fake_low "{low_f}" --fake_ecg "{ecg_f}" --out_npz "{out_npz}"')

    # Optional: pack right here (uncomment to create the npz now)
    # ecg = np.load(ecg_f)
    # low = np.load(low_f)
    # np.savez(out_npz, low=low, ecg=ecg)
    # print("Wrote:", out_npz)

Top candidates (lower score is better):
epoch | ecg_std  eda_std  resp_std | ecg_clip low_clip | n_ecg n_low | score
    9 |   0.482   0.053    0.540 |   0.000   0.021 |     8     8 | 1.328
    8 |   0.437   0.059    0.531 |   0.000   0.013 |     8     8 | 1.331
    7 |   0.400   0.068    0.511 |   0.000   0.006 |     8     8 | 1.339
   10 |   0.487   0.050    0.546 |   0.000   0.028 |     8     8 | 1.356
    6 |   0.371   0.080    0.477 |   0.000   0.003 |     8     8 | 1.363
   11 |   0.476   0.047    0.552 |   0.000   0.030 |     8     8 | 1.376
   12 |   0.474   0.045    0.556 |   0.000   0.030 |     8     8 | 1.376
   13 |   0.466   0.043    0.556 |   0.000   0.032 |     8     8 | 1.393

Best epoch suggestion: 9
Files:
  ECG : ../results/samples_v14_mm_tune4_linearup_v4\fake_ecg_epoch_009.npy
  LOW : ../results/samples_v14_mm_tune4_linearup_v4\fake_low_epoch_009.npy
NPZ will be written to:
  ../results/samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg.npz

Packing command:
python p

In [None]:
low_path = r"..\results\samples_v14_mm_tune4_linearup_v4\fake_low_epoch_009.npy"
ecg_path = r"..\results\samples_v14_mm_tune4_linearup_v4\fake_ecg_epoch_009.npy"
out_npz  = r"..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg.npz"

low = np.load(low_path, mmap_mode="r")   # (N, 120, 2) -> [:,:,0]=EDA, [:,:,1]=RESP
ecg = np.load(ecg_path, mmap_mode="r")   # (N, 5250, 1)

np.savez(out_npz, low=low, ecg=ecg)
print("Wrote:", out_npz, "| shapes:", low.shape, ecg.shape)

In [7]:
fold_dir = r"data/processed/tc_multigan_fold_S10"
out_npz  = r"data/processed/tc_multigan_fold_S10/test_real.npz"

low_path = ecg_path = None
for f in glob.glob(os.path.join(fold_dir, "test*.npy")):
    x = np.load(f, mmap_mode="r")
    if x.ndim == 3 and x.shape[1] == 120 and x.shape[2] in (2, 3):
        low_path = f
    elif x.ndim == 3 and x.shape[1] == 5250 and x.shape[2] == 1:
        ecg_path = f

print("Selected low:", low_path)
print("Selected ecg:", ecg_path)
if not (low_path and ecg_path):
    raise SystemExit("Couldn't find test low/ecg.")

low = np.load(low_path)   # (N,120,2) -> [:,:,0]=EDA, [:,:,1]=RESP
ecg = np.load(ecg_path)   # (N,5250,1)

eda  = low[..., 0]
resp = low[..., 1]
ecg  = ecg[..., 0]

os.makedirs(os.path.dirname(out_npz), exist_ok=True)
np.savez(out_npz, eda=eda, resp=resp, ecg=ecg)
print("Wrote:", out_npz, "| shapes:", eda.shape, resp.shape, ecg.shape)

Selected low: None
Selected ecg: None


SystemExit: Couldn't find test low/ecg.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [10]:
fold_dir = r"../data/processed/tc_multigan_fold_S10"
out_npz  = os.path.join(fold_dir, "test_real.npz")

low = np.load(os.path.join(fold_dir, "test_X_low.npy"))   # (N,120,2) or (N,120,3)
ecg = np.load(os.path.join(fold_dir, "test_X_ecg.npy"))   # (N,5250,1) or (N,5250)

print("low shape:", low.shape, " | ecg shape:", ecg.shape)

# Take EDA and RESP from LOW
if low.ndim == 3 and low.shape[1] == 120 and low.shape[2] >= 2:
    eda  = low[..., 0]
    resp = low[..., 1]
elif low.ndim == 2 and low.shape[1] == 240:         # fallback if flattened
    low = low.reshape(-1, 120, 2)
    eda, resp = low[..., 0], low[..., 1]
else:
    raise ValueError(f"Unexpected LOW shape {low.shape}")

# Take ECG, squeeze last dim if present
if ecg.ndim == 3 and ecg.shape[1] == 5250 and ecg.shape[2] == 1:
    ecg = ecg[..., 0]
elif ecg.ndim == 2 and ecg.shape[1] == 5250:
    pass
else:
    raise ValueError(f"Unexpected ECG shape {ecg.shape}")

np.savez(out_npz, eda=eda, resp=resp, ecg=ecg)
print("Wrote:", out_npz, "| shapes -> eda:", eda.shape, "resp:", resp.shape, "ecg:", ecg.shape)


low shape: (194, 120, 2)  | ecg shape: (194, 5250, 1)
Wrote: ../data/processed/tc_multigan_fold_S10\test_real.npz | shapes -> eda: (194, 120) resp: (194, 120) ecg: (194, 5250)


In [12]:
import numpy as np

real_p = r"../data/processed/tc_multigan_fold_S10/test_real.npz"
fake_p = r"../results/samples_v14_mm_tune4_linearup_v4/gen_e009_lowecg.npz"

real = np.load(real_p)
fake = np.load(fake_p)

print("REAL keys:", real.files, {k: real[k].shape for k in real.files})
print("FAKE keys:", fake.files, {k: fake[k].shape for k in fake.files})

REAL keys: ['eda', 'resp', 'ecg'] {'eda': (194, 120), 'resp': (194, 120), 'ecg': (194, 5250)}
FAKE keys: ['low', 'ecg'] {'low': (8, 120, 2), 'ecg': (8, 5250, 1)}


In [13]:
# --- paths (absolute to avoid CWD confusion) ---
fake_src   = r"C:\Users\Joseph\generative-health-models\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg.npz"
fake_fixed = r"C:\Users\Joseph\generative-health-models\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz"

d = np.load(fake_src)
print("FAKE keys/shapes:", d.files, {k: d[k].shape for k in d.files})

low = d["low"]               # (8, 120, 2)
ecg = d["ecg"]               # (8, 5250, 1)

eda  = low[..., 0]           # (8, 120)
resp = low[..., 1]           # (8, 120)
ecg  = ecg[..., 0]           # (8, 5250), squeeze last dim

np.savez(fake_fixed, eda=eda, resp=resp, ecg=ecg)
print("Wrote:", fake_fixed, "| shapes:", eda.shape, resp.shape, ecg.shape)

FAKE keys/shapes: ['low', 'ecg'] {'low': (8, 120, 2), 'ecg': (8, 5250, 1)}
Wrote: C:\Users\Joseph\generative-health-models\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz | shapes: (8, 120) (8, 120) (8, 5250)


In [15]:
import numpy as np, os

fake_p = r"..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz"
stats_p = r"..\data\processed\tc_multigan_fold_S10\norm_low.npz"

fake = np.load(fake_p)
eda = fake["eda"].astype(float)
resp = fake["resp"].astype(float)

stats = np.load(stats_p)
print("norm_low keys:", stats.files)

# try common key names:
mu = stats.get("mean", stats.get("mu", None))
sd = stats.get("std",  stats.get("sigma", None))

if mu is None or sd is None:
    raise RuntimeError("Could not find mean/std in norm_low.npz")

# assume [EDA, RESP] ordering; swap indices if your file says otherwise
eda = (eda - mu[0]) / (sd[0] + 1e-12)
resp = (resp - mu[1]) / (sd[1] + 1e-12)

out_p = fake_p.replace(".npz", "_renorm.npz")
np.savez(out_p, eda=eda, resp=resp, ecg=fake["ecg"])
print("Wrote:", out_p, {k: v.shape for k, v in np.load(out_p).items()})

norm_low keys: ['mean', 'std']
Wrote: ..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed_renorm.npz {'eda': (8, 120), 'resp': (8, 120), 'ecg': (8, 5250)}


In [16]:
import numpy as np

real_p = r"..\data\processed\tc_multigan_fold_S10\test_real.npz"
fake_p = r"..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed_renorm.npz"

real = np.load(real_p)
fake = np.load(fake_p)

print("REAL:", real.files, {k: real[k].shape for k in real.files})
print("FAKE:", fake.files, {k: fake[k].shape for k in fake.files})

for k in ("eda","resp","ecg"):
    r = real[k].ravel()
    f = fake[k].ravel()
    print(f"{k}: real mean/std = {r.mean():.3f}/{r.std():.3f} | fake mean/std = {f.mean():.3f}/{f.std():.3f}")

REAL: ['eda', 'resp', 'ecg'] {'eda': (194, 120), 'resp': (194, 120), 'ecg': (194, 5250)}
FAKE: ['eda', 'resp', 'ecg'] {'eda': (8, 120), 'resp': (8, 120), 'ecg': (8, 5250)}
eda: real mean/std = -1.095/0.140 | fake mean/std = -1.403/0.159
resp: real mean/std = 0.000/0.911 | fake mean/std = 0.021/0.182
ecg: real mean/std = 0.000/0.538 | fake mean/std = 0.175/0.507


In [19]:
stats = np.load(r"..\data\processed\tc_multigan_fold_S10\norm_low.npz")
print("means:", stats["mean"], "stds:", stats["std"])

f0 = np.load(r"..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz")
print("orig fake shapes:", {k:v.shape for k,v in f0.items()})
print("orig EDA/RESP ranges:", f0["eda"].min(), f0["eda"].max(), f0["resp"].min(), f0["resp"].max())

f1 = np.load(r"..\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed_renorm.npz")
print("renorm ranges:", f1["eda"].min(), f1["eda"].max(), f1["resp"].min(), f1["resp"].max())

means: [ 4.8523049e+00 -2.6733306e-04] stds: [3.3454838 3.093252 ]
orig fake shapes: {'eda': (8, 120), 'resp': (8, 120), 'ecg': (8, 5250)}
orig EDA/RESP ranges: -0.8139023 0.6417173 -0.96190315 0.95164174
renorm ranges: -1.6936884548543196 -1.2585885622947701 -0.31088182900295347 0.3077373226735141


In [20]:
import numpy as np

real = np.load(r"../data/processed/tc_multigan_fold_S10/test_real.npz")
fake0 = np.load(r"../results/samples_v14_mm_tune4_linearup_v4/gen_e009_lowecg_fixed.npz")          # before renorm
fake1 = np.load(r"../results/samples_v14_mm_tune4_linearup_v4/gen_e009_lowecg_fixed_renorm.npz")  # after renorm

def stats(x): 
    x = x.reshape(-1)
    return dict(mean=float(np.mean(x)), std=float(np.std(x)), 
                min=float(np.min(x)), max=float(np.max(x)))

print("REAL  EDA:", stats(real["eda"]))
print("FAKE0 EDA:", stats(fake0["eda"]))  # should be ~0 mean, ~0.05–0.15 std if generator collapsed
print("FAKE1 EDA:", stats(fake1["eda"]))  # if you z-scored again, std will be much larger

REAL  EDA: {'mean': -1.0953425168991089, 'std': 0.1400976926088333, 'min': -1.3401070833206177, 'max': -0.8161068558692932}
FAKE0 EDA: {'mean': 0.1588442623615265, 'std': 0.5316680073738098, 'min': -0.8139023184776306, 'max': 0.6417173147201538}
FAKE1 EDA: {'mean': -1.4029243533564986, 'std': 0.15892112060340477, 'min': -1.6936884548543196, 'max': -1.2585885622947701}


In [21]:
from pathlib import Path
import numpy as np

fold = Path(r"C:\Users\Joseph\generative-health-models\data\processed\tc_multigan_fold_S10")

def split_eda_resp(X, eda_first=True):
    """
    Accepts X with shape (N, 2, T) or (N, T, 2).
    Returns (eda, resp) each (N, T).
    """
    if X.ndim != 3 or 2 not in X.shape:
        raise ValueError(f"Expected 3D array with a 2-channel axis; got shape {X.shape}")
    if X.shape[1] == 2:       # (N, 2, T)
        ch0, ch1 = X[:,0,:], X[:,1,:]
    elif X.shape[2] == 2:     # (N, T, 2)
        ch0, ch1 = X[:,:,0], X[:,:,1]
    else:
        raise ValueError(f"Couldn't find channel axis of size 2 in shape {X.shape}")
    return (ch0, ch1) if eda_first else (ch1, ch0)

def build_npz(split, eda_first=True, label_col=0):
    Xp = fold / f"{split}_X_low.npy"
    yp = fold / f"{split}_cond.npy"   # adjust if your labels live elsewhere
    if not Xp.exists():
        raise FileNotFoundError(f"Missing {Xp}")
    if not yp.exists():
        raise FileNotFoundError(f"Missing {yp}")

    X = np.load(Xp)
    y_raw = np.load(yp)

    # choose label column
    if y_raw.ndim == 1:
        y = y_raw.astype(int).ravel()
    else:
        # print uniques per column to help you pick
        print(f"{split} cond columns uniques:", [np.unique(y_raw[:,i]) for i in range(y_raw.shape[1])])
        y = y_raw[:, label_col].astype(int).ravel()

    eda, resp = split_eda_resp(X, eda_first=eda_first)
    outp = fold / f"{split}_real.npz"
    np.savez(outp, eda=eda, resp=resp, y=y)
    print(f"Wrote {outp}  | eda:{eda.shape} resp:{resp.shape} y:{y.shape}")

# Run these:
build_npz("train", eda_first=True, label_col=0)
# Only if you need to rebuild test_real.npz from arrays:
# build_npz("test", eda_first=True, label_col=0)

Wrote C:\Users\Joseph\generative-health-models\data\processed\tc_multigan_fold_S10\train_real.npz  | eda:(2645, 120) resp:(2645, 120) y:(2645,)


In [24]:
fake_src  = base / r"results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz"
cond_path = base / r"data\processed\tc_multigan_fold_S10\test_cond.npy"   # labels you used when generating (if any)

def guess_eda_resp_keys(npz):
    keys = set(npz.files)
    if {"eda","resp"} <= keys:
        return "eda_resp"
    for k in ("low", "lowecg", "x", "data"):
        if k in keys:
            return k
    return None

def summarize_labels(y):
    vals, cnts = np.unique(y, return_counts=True)
    return dict(zip(map(int, vals), map(int, cnts)))

print("File:", fake_src)
assert fake_src.exists(), "Fake file not found."

f = np.load(fake_src)
print("Keys:", f.files)

# 1) Check for labels inside the fake file
label_key = next((k for k in ("y","labels","label","Y","Labels","Label") if k in f.files), None)
if label_key:
    y = f[label_key].ravel()
    print("✅ Labels found in fake file under key:", label_key)
    print("   y shape:", y.shape, "| uniques:", summarize_labels(y))
else:
    print("❌ No labels found inside fake file.")

# 2) Check signal channels and sample count
sig_key = guess_eda_resp_keys(f) if isinstance(f, np.lib.npyio.NpzFile) else None
N = None
if sig_key == "eda_resp":
    eda, resp = f["eda"], f["resp"]
    N, T = eda.shape[0], eda.shape[1]
    print(f"Signals: eda/resp present | eda shape {eda.shape} | resp shape {resp.shape}")
elif sig_key is not None:
    X = f[sig_key]
    print(f"Signals: packed under '{sig_key}' with shape {X.shape}")
    if X.ndim == 3 and 2 in X.shape:
        N = X.shape[0]
        T = X.shape[2] if X.shape[1] == 2 else X.shape[1]
        print(f"Interpreting as (N,2,T) or (N,T,2). N={N}, T={T}")
    else:
        print("⚠️ Unexpected shape; expected (N,2,T) or (N,T,2).")
else:
    print("⚠️ Could not find 'eda'/'resp' or a packed 2-channel array.")

# 3) If no labels inside, see if test_cond.npy can be used
if not label_key:
    if cond_path.exists():
        y_raw = np.load(cond_path)
        print("Found cond file:", cond_path, "| shape:", y_raw.shape)
        # If multi-column, try to auto-pick a binary column (0/1)
        if y_raw.ndim == 1:
            y_cond = y_raw.ravel()
            print("Cond uniques:", summarize_labels(y_cond))
        else:
            cand_cols = []
            for c in range(y_raw.shape[1]):
                u = np.unique(y_raw[:,c])
                if len(u) == 2 and set(u).issubset({0,1}):
                    cand_cols.append(c)
            if cand_cols:
                col = cand_cols[0]
                y_cond = y_raw[:, col].astype(int).ravel()
                print(f"Auto-picked binary cond column {col} with uniques:", summarize_labels(y_cond))
            else:
                print("⚠️ No obvious binary (0/1) column found in cond; you may need to specify label_col manually.")
                y_cond = y_raw[:, 0].astype(int).ravel()

        if N is not None and len(y_cond) != N:
            print(f"⚠️ Label count ({len(y_cond)}) does not match samples ({N}). Make sure you used the matching cond for this fake set.")
        else:
            print("If you want, you can pair these labels with the fake signals to build test_fake.npz for the classifier.")
    else:
        print("No cond file found at:", cond_path)

File: C:\Users\Joseph\generative-health-models\results\samples_v14_mm_tune4_linearup_v4\gen_e009_lowecg_fixed.npz
Keys: ['eda', 'resp', 'ecg']
❌ No labels found inside fake file.
Signals: eda/resp present | eda shape (8, 120) | resp shape (8, 120)
Found cond file: C:\Users\Joseph\generative-health-models\data\processed\tc_multigan_fold_S10\test_cond.npy | shape: (194,)
Cond uniques: {1: 77, 2: 46, 3: 23, 4: 48}
⚠️ Label count (194) does not match samples (8). Make sure you used the matching cond for this fake set.


In [30]:
from models.tc_multigan import create_tc_multigan
import torch

cfg = dict(z_dim=128, condition_dim=16, seq_length_low=240, seq_length_ecg=6000, hidden_dim=256)
G, D = create_tc_multigan(cfg)

B = 4
z = torch.randn(B, cfg['z_dim'])
cond = torch.randn(B, cfg['seq_length_low'], cfg['condition_dim'])

low_fake, ecg_fake = G(z, cond)
print(low_fake.shape, ecg_fake.shape)  # [B, 240, 2], [B, 6000, 1]

d_out = D(low_fake, ecg_fake, cond)
print(d_out[0].shape, d_out[1], d_out[2].shape)  

torch.Size([4, 240, 2]) torch.Size([4, 6000, 1])
torch.Size([4, 1]) None torch.Size([4, 257])
