In [8]:
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple
import ast
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
#from torch.cuda.amp import autocast, GradScaler
from torch.cuda.amp import autocast, GradScaler
import argparse, contextlib
from tqdm import tqdm

In [9]:
WINDOW = 50
STRIDE = 25
LOCATIONS = ["right_arm", "left_arm", "right_leg", "left_leg"]
AXES = ["x", "y", "z"]
data_dir = Path('data')
train_dir = data_dir / 'train'
meta_file = data_dir / 'meta_data.txt'
test_file = data_dir/'test.csv'
workdir = Path('work')
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}
num_classes = len(label_map)
C = 3
crit = nn.CrossEntropyLoss()

In [14]:
def detect_locations() -> list[str]:
    files = sorted(train_dir.glob('sbj_*.csv'))
    if not files:
        raise FileNotFoundError(f"No 'sbj_*.csv' files found in {data_dir}")
    sample = pd.read_csv(files[0], nrows=0)
    locs = sorted({col.split('_acc_')[0] for col in sample.columns if '_acc_' in col})
    if not locs:
        raise ValueError(f"No sensor columns found in {files[0]}")
    return locs
    sample = pd.read_csv(next(data_dir.glob('sbj_*.csv')), nrows=0)
    return sorted({col.split('_acc_')[0] for col in sample.columns if '_acc_' in col})

def cols_for(loc: str) -> List[str]:
    return [f"{loc}_acc_{ax}" for ax in AXES]

def make_clean_split(df: pd.DataFrame, loc: str, want_label: bool) -> pd.DataFrame:
    cols = cols_for(loc)
    cleaned = (
        df[cols + (['label'] if want_label else [])]
          .rename(columns=dict(zip(cols, AXES)))
          .dropna()
    )
    if want_label:
        return cleaned[AXES + ['label']].reset_index(drop=True)
    return cleaned[AXES].reset_index(drop=True)

def preprocess() -> List[str]:
    locs = detect_locations()
    print(locs)
    ds_dir = workdir / 'datasets'
    ds_dir.mkdir(parents=True, exist_ok=True)

    for loc in locs:
        parts = []
        need = cols_for(loc) + ['label']
        for f in sorted(train_dir.glob('sbj_*.csv')):
            df = pd.read_csv(f)
            if not all(c in df.columns for c in need):
                continue
            parts.append(make_clean_split(df, loc, True))
        if parts:
            pd.concat(parts).to_csv(ds_dir / f'train_{loc}.csv', index=False)


    test_raw = pd.read_csv(test_file)
    for loc in LOCATIONS:
        if all(c in test_raw.columns for c in cols_for(loc)):
            make_clean_split(test_raw, loc, False).to_csv(
                ds_dir / f'test_{loc}.csv', index=False)

    return locs


In [15]:
preprocess()

['left_arm', 'left_leg', 'right_arm', 'right_leg']


  df = pd.read_csv(f)
  df = pd.read_csv(f)
  df = pd.read_csv(f)
  df = pd.read_csv(f)


['left_arm', 'left_leg', 'right_arm', 'right_leg']

In [13]:
def segment_windows(data: np.ndarray, labels: np.ndarray|None, window:int=WINDOW, stride:int=STRIDE) -> Tuple[np.ndarray, np.ndarray|None]:
    X_list, y_list = [], []
    for s in range(0, len(data)-window+1, stride):
        seg = data[s:s+window]
        if np.isnan(seg).any(): continue
        X_list.append(seg)
        if labels is not None:
            lbl = labels[s:s+window]
            vals,cnts = np.unique(lbl, return_counts=True)
            y_list.append(vals[cnts.argmax()])
    X = np.stack(X_list) if X_list else np.empty((0,window,3),dtype=np.float32)
    y = np.array(y_list) if labels is not None else None
    return X, y

class WearDS(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray, scaler: StandardScaler):
        self.X = scaler.transform(X.reshape(-1,3)).reshape(X.shape)
        self.y = y
    def __len__(self): return len(self.X)
    def __getitem__(self, i):
        return torch.tensor(self.X[i]).permute(1,0), self.y[i]

class DeepConvLSTM(nn.Module):
    def __init__(self, n_classes:int):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(3,64,5,padding=2), nn.ReLU(),
            nn.Conv1d(64,64,5,padding=2), nn.ReLU(), nn.Dropout(0.2))
        self.lstm = nn.LSTM(64,128,batch_first=True)
        self.fc   = nn.Sequential(nn.Dropout(0.5), nn.Linear(128,n_classes))
    def forward(self,x):
        x = self.conv(x)
        x = x.permute(0,2,1)
        _,(h,_) = self.lstm(x)
        return self.fc(h[-1])

In [16]:
def train_epoch(model, loader, crit, opt, scaler_amp, device):
    model.train(); tot=correct=ls=0.0
    amp_ctx = autocast() if device=='cuda' else contextlib.nullcontext()
    for X,y in loader:
        X = X.to(device); y = y.to(device)
        opt.zero_grad()
        with amp_ctx:
            out=model(X); loss=crit(out,y)
        if scaler_amp:
            scaler_amp.scale(loss).backward(); scaler_amp.step(opt); scaler_amp.update()
        else:
            loss.backward(); opt.step()
        preds=out.argmax(1)
        tot+=y.size(0); correct+=(preds==y).sum().item(); ls+=loss.item()*y.size(0)
    return ls/tot, correct/tot

def train_locations(work_dir:Path, locs:List[str], epochs:int, batch:int, device:str):
    mdl_dir = work_dir/'models'; mdl_dir.mkdir(exist_ok=True)
    for loc in locs:
        csv = work_dir/'datasets'/f'train_{loc}.csv'
        df = pd.read_csv(csv)
        X,y = segment_windows(df[AXES].values.astype(np.float32), df['label'].values)
        if X.size==0: continue
        # encode
        classes = sorted(set(y)); lbl2idx={c:i for i,c in enumerate(classes)}
        y_enc = np.array([lbl2idx[v] for v in y],dtype=np.int64)
        scaler = StandardScaler().fit(X.reshape(-1,3))
        ds = WearDS(X,y_enc,scaler)
        dl = DataLoader(ds,batch_size=batch,shuffle=True,
                        pin_memory=(device=='cuda'),num_workers=0)
        model=DeepConvLSTM(len(classes)).to(device)
        w=compute_class_weight('balanced',classes=np.unique(y_enc),y=y_enc)
        crit=nn.CrossEntropyLoss(weight=torch.tensor(w,dtype=torch.float).to(device))
        opt=torch.optim.AdamW(model.parameters(),1e-3,weight_decay=1e-4)
        amp=GradScaler() if device=='cuda' else None
        best=0.0
        for ep in range(1,epochs+1):
            loss,acc=train_epoch(model,dl,crit,opt,amp,device)
            print(f"[{loc}] {ep}/{epochs} loss={loss:.4f} acc={acc:.4f}")
            if acc>best:
                best=acc
                ckpt={'model':model.state_dict(),'scaler':scaler,'label_map':lbl2idx}
                torch.save(ckpt, mdl_dir/f'{loc}_best.pt')
        del model,dl,ds,opt,crit,amp


In [20]:
all_locs = detect_locations()
train_locations(workdir, all_locs, 20, 256, 'cpu')

[left_arm] 1/20 loss=1.7030 acc=0.4388
[left_arm] 2/20 loss=1.2990 acc=0.5672
[left_arm] 3/20 loss=1.1476 acc=0.6190
[left_arm] 4/20 loss=1.0661 acc=0.6459
[left_arm] 5/20 loss=1.0026 acc=0.6685
[left_arm] 6/20 loss=0.9658 acc=0.6797
[left_arm] 7/20 loss=0.9220 acc=0.6952
[left_arm] 8/20 loss=0.8849 acc=0.7072
[left_arm] 9/20 loss=0.8530 acc=0.7176
[left_arm] 10/20 loss=0.8368 acc=0.7206
[left_arm] 11/20 loss=0.8078 acc=0.7314
[left_arm] 12/20 loss=0.7790 acc=0.7419
[left_arm] 13/20 loss=0.7652 acc=0.7449
[left_arm] 14/20 loss=0.7424 acc=0.7521
[left_arm] 15/20 loss=0.7306 acc=0.7544
[left_arm] 16/20 loss=0.7055 acc=0.7623
[left_arm] 17/20 loss=0.6933 acc=0.7672
[left_arm] 18/20 loss=0.6833 acc=0.7705
[left_arm] 19/20 loss=0.6794 acc=0.7719
[left_arm] 20/20 loss=0.6579 acc=0.7796
[left_leg] 1/20 loss=1.5423 acc=0.4431
[left_leg] 2/20 loss=1.1718 acc=0.5457
[left_leg] 3/20 loss=1.0915 acc=0.5723
[left_leg] 4/20 loss=1.0153 acc=0.6021
[left_leg] 5/20 loss=0.9530 acc=0.6289
[left_leg] 6/2

In [76]:

def predict_and_save(
    test_csv: Path,
    models_dir: Path,
    output_csv: Path,
    axes: List[str] = ['x_axis', 'y_axis', 'z_axis'],
    device: str = 'cpu'
):
    df = pd.read_csv(test_csv)
    results = []
    for loc, grp in df.groupby('sensor_location', sort=False):
        print(f"→ Predicting location '{loc}' ({len(grp)} rows)")
        ckpt_path = models_dir / f"{loc}_best.pt"
        if not ckpt_path.exists():
            raise FileNotFoundError(f"Model for '{loc}' not found at {ckpt_path}")

        with torch.serialization.safe_globals([StandardScaler]):
            ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)

        num_classes = len(ckpt['label_map'])
        model = DeepConvLSTM(num_classes).to(device)
        model.load_state_dict(ckpt['model'])
        model.eval()
        scaler: StandardScaler = ckpt['scaler']
        inv_map = {v: k for k, v in ckpt['label_map'].items()}

        windows = []
        for _, row in grp.iterrows():
            lists = [ast.literal_eval(row[c]) for c in axes]
            arr = np.stack(lists, axis=0).T.astype('float32')  # (window_len, 3)
            windows.append(arr)
        X = np.stack(windows, axis=0)  # (n_loc, window_len, 3)

        n, L, C = X.shape
        Xs = scaler.transform(X.reshape(-1, C)).reshape(n, L, C)

        ds = WearDS(X, Xs, scaler)
        dl = DataLoader(ds, batch_size=256, shuffle=False, num_workers=0)

        preds = []
        with torch.no_grad():
            for xb, _ in dl:
                if isinstance(xb, list):
                    xb = np.stack(xb, axis=0)
                xb = torch.tensor(xb, dtype=torch.float32, device=device)
                out = model(xb)
                for pi in out.argmax(dim=1).cpu().numpy():
                    lbl = inv_map[int(pi)]
                    preds.append(LABEL_MAP[lbl])

        for (_, row), p in zip(grp.iterrows(), preds):
            results.append((row['id'], p))

    res_df = pd.DataFrame(results, columns=['id', 'label'])
    res_df.to_csv(output_csv, index=False)
    print(f"✅ Saved predictions to {output_csv}")

predict_and_save(test_file,workdir/'models', workdir/'result.csv')

→ Predicting location 'right_arm' (12234 rows)


  xb = torch.tensor(xb, dtype=torch.float32, device=device)


→ Predicting location 'left_arm' (12234 rows)


  xb = torch.tensor(xb, dtype=torch.float32, device=device)


→ Predicting location 'right_leg' (12234 rows)


  xb = torch.tensor(xb, dtype=torch.float32, device=device)


→ Predicting location 'left_leg' (12234 rows)


  xb = torch.tensor(xb, dtype=torch.float32, device=device)


✅ Saved predictions to work/result.csv
