## Récupération Event et vérification segment

In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset

## création dataset anoté

In [2]:
import os, glob
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

# Dossiers racines
ROOTS = {
    "events": "Events",
    "emg": "/home/fisa/stockage1/mindscan/EMG",
    "imu": "/home/fisa/stockage1/mindscan/IMU",
    "plantar": "/home/fisa/stockage1/mindscan/Plantar_activity",
    "skeleton": "/home/fisa/stockage1/mindscan/Skeleton",
}

# Noms de fichiers dans chaque dossier
FILENAMES = {
    "emg": "emg.csv",
    "imu": "imu.csv",
    "plantar": "insoles.csv",
    "skeleton": "skeleton.csv",
}

# Longueur fixe après découpage (tu peux changer)
L = 256

# Colonnes à ignorer si présentes
DROP_COLS = {"Frame", "Time", "TIME", "EMG TIME", "ACC_GYRO TIME"}


In [3]:
def smart_read_csv(path: str):
    # essaie ; puis tab puis virgule
    for sep in [";", "\t", ","]:
        try:
            df = pd.read_csv(path, sep=sep)
            if df.shape[1] > 2:
                return df
        except Exception:
            pass
    raise ValueError(f"Impossible de lire {path} (séparateur inconnu)")

def find_time_col(df):
    for c in df.columns:
        if "time" in c.lower():
            return c
    raise ValueError("Aucune colonne temps trouvée")

def read_and_slice_by_time(path, t0, t1):
    df = smart_read_csv(path)
    time_col = find_time_col(df)

    t = pd.to_numeric(df[time_col], errors="coerce").to_numpy(np.float32)

    cols = [c for c in df.columns if c != time_col and c not in DROP_COLS]
    X = df[cols].apply(pd.to_numeric, errors="coerce").to_numpy(np.float32)

    mask = (t >= t0) & (t <= t1)
    Xseg = X[mask]

    # si segment vide : on met une ligne de zéros
    if Xseg.shape[0] == 0:
        Xseg = np.zeros((1, X.shape[1]), dtype=np.float32)

    return torch.from_numpy(Xseg)  # [Tseg, C]


In [4]:
def smart_read_csv(path: str):
    # essaie ; puis tab puis virgule
    for sep in [";", "\t", ","]:
        try:
            df = pd.read_csv(path, sep=sep)
            if df.shape[1] > 2:
                return df
        except Exception:
            pass
    raise ValueError(f"Impossible de lire {path} (séparateur inconnu)")

def find_time_col(df):
    for c in df.columns:
        if "time" in c.lower():
            return c
    raise ValueError("Aucune colonne temps trouvée")

def read_and_slice_by_time(path, t0, t1):
    df = smart_read_csv(path)
    time_col = find_time_col(df)

    t = pd.to_numeric(df[time_col], errors="coerce").to_numpy(np.float32)

    cols = [c for c in df.columns if c != time_col and c not in DROP_COLS]
    X = df[cols].apply(pd.to_numeric, errors="coerce").to_numpy(np.float32)

    mask = (t >= t0) & (t <= t1)
    Xseg = X[mask]

    # si segment vide : on met une ligne de zéros
    if Xseg.shape[0] == 0:
        Xseg = np.zeros((1, X.shape[1]), dtype=np.float32)

    return torch.from_numpy(Xseg)  # [Tseg, C]


In [5]:
def resample_to_L(x: torch.Tensor, L: int) -> torch.Tensor:
    # x: [T, C] -> [L, C]
    T, C = x.shape
    if T <= 1:
        return torch.zeros((L, C), dtype=x.dtype)

    idx = torch.linspace(0, T - 1, steps=L)
    idx0 = idx.floor().long()
    idx1 = torch.clamp(idx0 + 1, max=T - 1)
    w = (idx - idx0.float()).unsqueeze(1)

    x0 = x[idx0]
    x1 = x[idx1]
    return (1 - w) * x0 + w * x1


In [6]:
def build_segments_index(events_root="/home/fisa/stockage1/mindscan/Events/"):
    rows = []
    subjects = sorted([
        d for d in os.listdir(events_root)
        if os.path.isdir(os.path.join(events_root, d))
    ])

    for subject in subjects:
        subject_dir = os.path.join(events_root, subject)
        seqs = sorted([
            d for d in os.listdir(subject_dir)
            if os.path.isdir(os.path.join(subject_dir, d))
        ])

        for seq in seqs:
            classif_path = os.path.join(subject_dir, seq, "classif.csv")
            if not os.path.exists(classif_path):
                continue

            ev = pd.read_csv(classif_path, sep=";")
            for _, r in ev.iterrows():
                rows.append({
                    "subject": subject,
                    "seq": seq,
                    "label": int(float(r["Class"])),      # 1..31
                    "t0": float(r["Timestamp Start"]),
                    "t1": float(r["Timestamp End"]),
                    "name": r["Name"],
                })

    return pd.DataFrame(rows)

segments = build_segments_index()
print("Segments:", len(segments))
print("Classes présentes:", segments["label"].nunique())
segments.head()


Segments: 10204
Classes présentes: 31


Unnamed: 0,subject,seq,label,t0,t1,name
0,S01,Sequence_01,1,8.19167,13.09167,Standing in T-pose
1,S01,Sequence_01,7,13.09167,16.84167,Standing on heels
2,S01,Sequence_01,10,16.84167,21.61667,Walking backwards
3,S01,Sequence_01,21,21.61667,25.59167,Climbing the stairs
4,S01,Sequence_01,6,25.59167,30.54167,Standing on tiptoes


In [7]:
class MultimodalEventDataset(Dataset):
    def __init__(self, segments_df, roots, filenames, L=256):
        self.df = segments_df.reset_index(drop=True)
        self.roots = roots
        self.filenames = filenames
        self.L = L

    def _load_mod(self, mod, subject, seq, t0, t1):
        path = os.path.join(self.roots[mod], subject, seq, self.filenames[mod])
        x = read_and_slice_by_time(path, t0, t1)  # [Tseg, C]
        x = resample_to_L(x, self.L)              # [L, C]
        return x.transpose(0, 1).contiguous()     # [C, L]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        r = self.df.iloc[i]
        subject, seq = r["subject"], r["seq"]
        t0, t1 = float(r["t0"]), float(r["t1"])
        y = int(r["label"]) - 1  # 1..31 -> 0..30

        return {
            "imu": self._load_mod("imu", subject, seq, t0, t1),
            "emg": self._load_mod("emg", subject, seq, t0, t1),
            "plantar": self._load_mod("plantar", subject, seq, t0, t1),
            "skeleton": self._load_mod("skeleton", subject, seq, t0, t1),
            "y": torch.tensor(y, dtype=torch.long),
        }

ds = MultimodalEventDataset(segments, ROOTS, FILENAMES, L=L)
sample = ds[0]
for k in ["imu", "emg", "plantar", "skeleton"]:
    print(k, sample[k].shape)
print("label:", sample["y"].item())


imu torch.Size([96, 256])
emg torch.Size([16, 256])
plantar torch.Size([50, 256])
skeleton torch.Size([170, 256])
label: 0


  df = pd.read_csv(path, sep=sep)


---------------
#### IMU : C = 96

imu torch.Size([96, 256]) = Parce que 16 capteurs/segments corporels × 6 mesures (ACC x,y,z + GYRO x,y,z) → 16 × 6 = 96 canaux.
Chaque canal = une courbe dans le temps (ex: “Arm_Left ACC X”, etc.)

#### EMG : C = 16

Tu as ~16 colonnes EMG (un muscle/capteur par colonne).
Donc 16 canaux.

#### Plantar : C = 50

Pour chaque pied tu as environ 25 valeurs :

16 pressions
* 3 accel
* 3 gyro
* 1 force
* 2 centre de pression (COP)

→ 25 × 2 pieds = 50 canaux.

#### Skeleton : C = 170

Là, ce ne sont pas des “capteurs”, c’est des features du squelette :
chaque articulation a plusieurs valeurs (position XYZ + quaternion QX QY QZ QW) et tu as plusieurs articulations
Tu as au final 170 colonnes numériques (hors “Time” et “Frame”), donc 170 canaux.

TEST


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# --- 1) Si segments n'existe pas, on le reconstruit ---
try:
    segments
except NameError:
    def build_segments_index(events_root="Events"):
        rows = []
        subjects = sorted([
            d for d in os.listdir(events_root)
            if os.path.isdir(os.path.join(events_root, d))
        ])
        for subject in subjects:
            subject_dir = os.path.join(events_root, subject)
            seqs = sorted([
                d for d in os.listdir(subject_dir)
                if os.path.isdir(os.path.join(subject_dir, d))
            ])
            for seq in seqs:
                classif_path = os.path.join(subject_dir, seq, "classif.csv")
                if not os.path.exists(classif_path):
                    continue
                ev = pd.read_csv(classif_path, sep=";")
                for _, r in ev.iterrows():
                    rows.append({
                        "subject": subject,
                        "seq": seq,
                        "label": int(float(r["Class"])),
                        "t0": float(r["Timestamp Start"]),
                        "t1": float(r["Timestamp End"]),
                        "name": r["Name"],
                    })
        return pd.DataFrame(rows)

    segments = build_segments_index(ROOTS["events"])
    print("segments rebuilt:", len(segments))

# --- 2) Filtrer uniquement la première séquence (S01 / Sequence_01) ---
SUBJECT = "S01"
SEQ = "Sequence_01"
seg_seq1 = segments[(segments["subject"] == SUBJECT) & (segments["seq"] == SEQ)].reset_index(drop=True)

print("Nb d'actions annotées dans", SUBJECT, SEQ, ":", len(seg_seq1))
display(seg_seq1[["name","label","t0","t1"]])

# --- 3) Choisir quelle action visualiser (par index) ---
IDX = 1   # change à 5, 10, etc. pour voir une autre action
r = seg_seq1.iloc[IDX]
t0, t1, lab, name = float(r["t0"]), float(r["t1"]), int(r["label"]), r["name"]

print(f"\nAction choisie IDX={IDX}: '{name}' (class {lab})")
print(f"Intervalle temps: t0={t0:.3f}s -> t1={t1:.3f}s (durée {(t1-t0):.3f}s)")

# --- 4) Charger un segment pour une modalité ---
L = 256  # longueur fixe resamplée

def load_segment(mod):
    path = os.path.join(ROOTS[mod], SUBJECT, SEQ, FILENAMES[mod])
    x_raw = read_and_slice_by_time(path, t0, t1)   # [Tseg, C]
    x_res = resample_to_L(x_raw, L)                # [L, C]
    return x_raw, x_res

raw = {}
res = {}

for mod in ["imu","emg","plantar","skeleton"]:
    x_raw, x_res = load_segment(mod)
    raw[mod] = x_raw
    res[mod] = x_res
    print(f"{mod}: raw={tuple(x_raw.shape)}  resampled={tuple(x_res.shape)}")

# --- 5) Fonctions de plot ---
def plot(mod, channels=(0,1,2), use_resampled=True):
    X = res[mod].numpy() if use_resampled else raw[mod].numpy()  # [L,C] ou [Tseg,C]
    plt.figure(figsize=(10, 3.5))
    for ch in channels:
        plt.plot(X[:, ch], label=f"ch{ch}")
    plt.title(f"{mod.upper()} — {'resampled' if use_resampled else 'raw'} — {name} (class {lab})")
    plt.xlabel("index temporel")
    plt.ylabel("valeur")
    plt.legend()
    plt.tight_layout()
    plt.show()

# --- 6) Visualiser (quelques canaux) ---
plot("imu", channels=(0,1,2), use_resampled=False)
plot("imu", channels=(0,1,2), use_resampled=True)

plot("emg", channels=(0,1,2), use_resampled=False)
plot("emg", channels=(0,1,2), use_resampled=True)

plot("plantar", channels=(0,1,2), use_resampled=False)
plot("plantar", channels=(0,1,2), use_resampled=True)

plot("skeleton", channels=(0,1,2), use_resampled=False)
plot("skeleton", channels=(0,1,2), use_resampled=True)


# Modele IMU


In [9]:
import torch
import torch.nn as nn

class IMU_CNN(nn.Module):
    def __init__(self, num_classes=31):
        super().__init__()

        self.features = nn.Sequential(
            # Entrée : [B, 96, 256]
            nn.Conv1d(96, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),

            nn.Conv1d(64, 128, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),

            nn.Conv1d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
        )

        # Classification
        self.classifier = nn.Linear(256, num_classes)

    def forward(self, x):
        """
        x : [B, 96, 256]
        """
        x = self.features(x)       # [B, 256, T']
        x = x.mean(dim=-1)         # Global Average Pooling -> [B, 256]
        logits = self.classifier(x)  # [B, 31]
        return logits


In [10]:
# garder uniquement S01 à S24
allowed_subjects = {f"S{str(i).zfill(2)}" for i in range(1, 25)}

segments_1_24 = segments[segments["subject"].isin(allowed_subjects)].reset_index(drop=True)

print("Segments totaux (S01–S24):", len(segments_1_24))
print("Sujets présents:", sorted(segments_1_24["subject"].unique()))


Segments totaux (S01–S24): 7653
Sujets présents: ['S01', 'S02', 'S03', 'S04', 'S05', 'S06', 'S07', 'S08', 'S09', 'S10', 'S11', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'S18', 'S19', 'S20', 'S21', 'S22', 'S23', 'S24']


In [11]:
import numpy as np

subjects = sorted(segments_1_24["subject"].unique())

np.random.seed(0)
np.random.shuffle(subjects)

n_train = int(0.8 * len(subjects))
train_subjects = set(subjects[:n_train])
val_subjects   = set(subjects[n_train:])

train_segments = segments_1_24[segments_1_24["subject"].isin(train_subjects)].reset_index(drop=True)
val_segments   = segments_1_24[segments_1_24["subject"].isin(val_subjects)].reset_index(drop=True)

print("Train subjects:", sorted(train_subjects))
print("Val subjects:", sorted(val_subjects))
print("Train segments:", len(train_segments))
print("Val segments:", len(val_segments))


Train subjects: ['S02', 'S03', 'S05', 'S06', 'S07', 'S08', 'S09', 'S10', 'S11', 'S12', 'S14', 'S15', 'S17', 'S18', 'S19', 'S20', 'S21', 'S23', 'S24']
Val subjects: ['S01', 'S04', 'S13', 'S16', 'S22']
Train segments: 6084
Val segments: 1569


In [12]:
# vérifier les labels dans train et val
print("Train labels uniques:", sorted(train_segments["label"].unique()))
print("Val labels uniques:", sorted(val_segments["label"].unique()))

print("Min label:", segments["label"].min())
print("Max label:", segments["label"].max())


Train labels uniques: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
Val labels uniques: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
Min label: 1
Max label: 31


In [17]:
from torch.utils.data import DataLoader

import torch

# Check available devices
print("CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())

device = torch.device(0)
print("Device name:", torch.cuda.get_device_name(0))
print("PyTorch version:", torch.__version__)

train_ds = MultimodalEventDataset(train_segments, ROOTS, FILENAMES, L=L)
print(train_ds.df)
val_ds   = MultimodalEventDataset(val_segments, ROOTS, FILENAMES, L=L)

train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
val_dl   = DataLoader(val_ds, batch_size=64, shuffle=False)

model = IMU_CNN(num_classes=31).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


CUDA available: True
Device count: 2
Current device: 0
Device name: NVIDIA RTX 6000 Ada Generation
PyTorch version: 2.7.1+cu128
     subject          seq  label         t0         t1  \
0        S02  Sequence_01      1    3.70833    8.57500   
1        S02  Sequence_01     26    8.57500   12.17500   
2        S02  Sequence_01     10   12.17500   19.00833   
3        S02  Sequence_01     24   19.00833   22.96667   
4        S02  Sequence_01     16   22.96667   26.14167   
...      ...          ...    ...        ...        ...   
6079     S24  Sequence_10     12  125.94167  132.85833   
6080     S24  Sequence_10     23  132.85833  137.17500   
6081     S24  Sequence_10      3  137.17500  142.54167   
6082     S24  Sequence_10     26  142.54167  144.66667   
6083     S24  Sequence_10      1  144.66667  147.46458   

                                name  
0                 Standing in T-pose  
1                    Leaning forward  
2                  Walking backwards  
3                  

In [None]:
import time
import torch
import torch.nn as nn
from tqdm import tqdm
def train_imu_verbose(model, train_dl, val_dl, epochs=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    print("test")

    for epoch in tqdm(range(epochs)):
        print("test2")
        start_time = time.time()

        # ===== TRAIN =====
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for i, batch in enumerate(train_dl):
            x = batch["imu"].to(device)   # [B, 96, 256]
            y = batch["y"].to(device)     # [B]
            print(x, y, "bonjour")
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * x.size(0)
            correct += (logits.argmax(1) == y).sum().item()
            total += x.size(0)

            # --- affichage intermédiaire toutes les N batches ---
            if (i + 1) % 20 == 0:
                avg_loss = running_loss / total
                avg_acc = correct / total
                print(
                    f"Epoch [{epoch+1}/{epochs}] "
                    f"Batch [{i+1}/{len(train_dl)}] "
                    f"Train loss: {avg_loss:.4f} "
                    f"Train acc: {avg_acc:.3f}"
                )

        train_loss = running_loss / total
        train_acc = correct / total

        # ===== VALIDATION =====
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for batch in val_dl:
                x = batch["imu"].to(device)
                y = batch["y"].to(device)

                logits = model(x)
                loss = criterion(logits, y)

                val_loss += loss.item() * x.size(0)
                val_correct += (logits.argmax(1) == y).sum().item()
                val_total += x.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total

        elapsed = time.time() - start_time

        # ===== RÉSUMÉ ÉPOQUE =====
        print("=" * 60)
        print(
            f"Epoch {epoch+1}/{epochs} terminé en {elapsed:.1f}s\n"
            f"Train | loss: {train_loss:.4f} | acc: {train_acc:.3f}\n"
            f"Val   | loss: {val_loss:.4f} | acc: {val_acc:.3f}"
        )
        print("=" * 60)

model = IMU_CNN(num_classes=31).to(device)
train_imu_verbose(model, train_dl, val_dl, epochs=5)


test


  0%|          | 0/5 [00:00<?, ?it/s]

test2
tensor([[[ 9.7473e-02,  9.4780e-02,  8.3177e-02,  ..., -3.7377e-01,
          -4.8456e-01, -2.8754e-01],
         [ 9.7028e-01,  9.4892e-01,  9.6223e-01,  ...,  6.4705e-01,
           7.8256e-01,  8.4711e-01],
         [-7.1167e-02, -8.2627e-02, -8.4044e-02,  ..., -1.6596e-01,
          -2.6441e-01, -3.4070e-01],
         ...,
         [-1.4405e+01, -1.3203e+01, -1.4589e+01,  ..., -2.8744e+01,
          -1.7084e+01,  4.0282e+01],
         [ 5.9160e+00,  5.7370e+00,  5.3716e+00,  ...,  1.9009e+01,
          -5.6177e+01,  2.5013e+02],
         [-9.3359e+00, -8.4913e+00, -7.2885e+00,  ..., -5.8022e+01,
          -7.7010e+01, -1.1161e+02]],

        [[ 2.2375e-01,  2.2538e-01,  2.2370e-01,  ...,  1.4969e-01,
           1.5811e-01,  1.6113e-01],
         [ 8.5529e-01,  8.7734e-01,  9.0030e-01,  ...,  9.5851e-01,
           9.4156e-01,  9.2902e-01],
         [-3.2581e-01, -3.4087e-01, -3.4738e-01,  ..., -2.5117e-01,
          -2.6517e-01, -2.7960e-01],
         ...,
         [-2.1634e+