In [1]:
!pip install pandas



In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Construye tensores (4 × ROI × ROI) recortando todas las series al
mínimo número de TRs presente en los datos.
Matrices:
  0 = Pearson corr.
  1 = Mutual Information
  2 = Granger Causality (-log p-value, lag = 1)
  3 = Variabilidad dFC (media |ΔCorr|, ventana 20 TR, solape 50 %)
Autor: ChatGPT · 2025-04-27
"""

import os, logging, numpy as np, pandas as pd, torch
from scipy.stats import zscore
from sklearn.metrics import mutual_info_score
from statsmodels.tsa.stattools import grangercausalitytests
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm

# ---------- 1 · Rutas ----------
base_dir     = "/home/santiago/Desktop/GrandMeanNorm/"
csv_path     = os.path.join(base_dir, "SubjectsDataAndTests.csv")
signals_dir  = os.path.join(base_dir, "CleanSignals")
output_dir   = os.path.join(base_dir, "TensorData")
os.makedirs(output_dir, exist_ok=True)

# ---------- 2 · Metadata ----------
df          = pd.read_csv(csv_path)
subject_ids = df["SubjectID"].astype(str).tolist()
print(f"Total subjects in CSV: {len(subject_ids)}")

# ---------- 3 · Logging ----------
logging.basicConfig(filename='tensor_processing_clean.log', level=logging.ERROR)

# ---------- 4 · Averiguar TR mínimo ----------
min_tr = None
for sid in subject_ids:
    f = os.path.join(signals_dir, f"signals_clean_{sid}.npy")
    if not os.path.exists(f):
        continue
    try:
        shape0 = np.load(f, mmap_mode="r").shape[0]
    except Exception as e:
        logging.error(f"{sid} · shape read error: {e}")
        continue
    min_tr = shape0 if min_tr is None else min(min_tr, shape0)

if min_tr is None:
    raise RuntimeError("No se encontró ningún archivo de señales.")

TARGET_TR = min_tr    # normalmente 140
print(f"👉 Longitud mínima encontrada (TARGET_TR) = {TARGET_TR} TR")

# ---------- 5 · Funciones de conectividad ----------
def discretize(arr, bins=10):
    disc = np.zeros_like(arr, dtype=np.int16)
    for k in range(arr.shape[1]):
        edges = np.histogram_bin_edges(arr[:, k], bins=bins)
        disc[:, k] = np.digitize(arr[:, k], edges)
    return disc

def mutual_information_matrix(arr, bins=10):
    d = discretize(arr, bins)
    n = d.shape[1]
    M = np.zeros((n, n), dtype=np.float32)
    for i in range(n):
        for j in range(i, n):
            v = mutual_info_score(d[:, i], d[:, j])
            M[i, j] = M[j, i] = v
    return M

def _granger_worker(args):
    sigs, i, j, lag = args
    if i == j:
        return i, j, 0.0
    try:
        res = grangercausalitytests(sigs[:, [i, j]], lag, verbose=False)
        p   = res[lag][0]['ssr_ftest'][1]
        return i, j, -np.log(p) if p > 0 else 0.0
    except Exception:
        return i, j, 0.0

def granger_matrix(arr, lag=1, workers=6):
    n = arr.shape[1]
    G = np.zeros((n, n), dtype=np.float32)
    args = [(arr, i, j, lag) for i in range(n) for j in range(n)]
    with ProcessPoolExecutor(max_workers=workers) as ex:
        for fut in as_completed([ex.submit(_granger_worker, a) for a in args]):
            i, j, v = fut.result()
            G[i, j] = v
    return G

def correlation_matrix(arr):
    return np.corrcoef(zscore(arr, axis=0), rowvar=False).astype(np.float32)

def variability_matrix(arr, win=20, step=10):
    n_tp, n_roi = arr.shape
    n_win = (n_tp - win) // step + 1
    if n_win < 2:
        raise ValueError("Series muy corta para variabilidad.")
    z = zscore(arr[:win], axis=0)
    prev = np.abs(np.corrcoef(z, rowvar=False).astype(np.float32))
    acc  = np.zeros_like(prev)
    for w in range(1, n_win):
        s   = w * step
        z   = zscore(arr[s:s+win], axis=0)
        cur = np.abs(np.corrcoef(z, rowvar=False).astype(np.float32))
        acc += np.abs(cur - prev)
        prev = cur
    return acc / (n_win - 1)

def stack_mats(corr, mi, gc, var):
    return np.stack([corr, mi, gc, var], axis=0)

# ---------- 6 · Procesamiento de un sujeto ----------
def process_subject(sid, group):
    f = os.path.join(signals_dir, f"signals_clean_{sid}.npy")
    if not os.path.exists(f):
        print(f"⚠️ Falta archivo: {f}")
        return False
    try:
        sig = np.load(f).astype(np.float32)
    except Exception as e:
        logging.error(f"{sid} · load error: {e}")
        return False

    if sig.shape[0] < TARGET_TR:
        print(f"⚠️ {sid} tiene menos de {TARGET_TR} TR.  Se descarta.")
        return False

    sig = sig[:TARGET_TR]                      # recorte al mínimo común

    if np.isnan(sig).any() or np.isinf(sig).any():
        print(f"⚠️ {sid} contiene NaNs/Inf.  Saltado.")
        return False

    corr = correlation_matrix(sig)
    mi   = mutual_information_matrix(sig)
    gc   = granger_matrix(sig, workers=4)
    var  = variability_matrix(sig)

    tensor = torch.from_numpy(stack_mats(corr, mi, gc, var))
    torch.save(tensor, os.path.join(output_dir, f"{group}_tensor_{sid}.pt"))

    del sig, corr, mi, gc, var, tensor
    return True

# ---------- 7 · Loop principal ----------
ok = 0
with tqdm(total=len(subject_ids), desc="Building tensors") as bar:
    for sid in subject_ids:
        row   = df.loc[df['SubjectID'].astype(str) == sid]
        group = "Other"
        if not row.empty and 'ResearchGroup' in row.columns:
            v = row['ResearchGroup'].values[0]
            if v in ("AD", "CN"):
                group = v
        if process_subject(sid, group):
            ok += 1
        bar.update(1)

print(f"\n✅ Tensors creados: {ok}/{len(subject_ids)}")
print(f"✅ Directorio de salida: {output_dir}")






Total subjects in CSV: 432
👉 Longitud mínima encontrada (TARGET_TR) = 140 TR


