In [15]:
import torch
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import torch.nn as nn
from collections import Counter
import os, random, gc, warnings
from pathlib import Path
import ast, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader,WeightedRandomSampler

In [22]:
WINDOW_LEN = 50
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}

warnings.filterwarnings('ignore')
test_subjects = [21]
embedding_dim = 64
num_classes = len(label_map.keys())
sequence_length = 50
input_channels = 3
num_layers = 2
hidden_size = 128
learning_rate = 0.001
num_epochs = 30
batch_size = 256
LR           = 3e-3
WEIGHT_DECAY = 1e-4
test_subjects = [21]
loc = "right_arm"
THRESHOLD: float = 0.25
DEFAULT_CLASS: int = 0
device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')


RAW_DIR   = Path('data')
TRAIN_DIR = RAW_DIR / 'dataset_without_null'
TEST_CSV  = RAW_DIR / 'test.csv'                   # public test
LOCATION_IDS = ["right_arm", "left_arm", "right_leg", "left_leg"]
WORK_DIR = Path('resnet')

In [25]:
class WindowDS(Dataset):
    def __init__(self, df, fit_scaler=False, scaler=None, augment=False):
        self.y   = df['label'].values.astype('int64') if 'label' in df else None
        x   = np.stack([np.array(ast.literal_eval(col), dtype='float32')
                        for col in df['x_axis']])
        y_  = np.stack([np.array(ast.literal_eval(col), dtype='float32')
                        for col in df['y_axis']])
        z   = np.stack([np.array(ast.literal_eval(col), dtype='float32')
                        for col in df['z_axis']])
        self.x = np.stack([x, y_, z], axis=1)          # (N, 3, 50)
        if fit_scaler:
            self.mean = self.x.mean((0,2), keepdims=True)
            self.std  = self.x.std((0,2),  keepdims=True) + 1e-7
        else:
            self.mean, self.std = scaler
        self.x = (self.x-self.mean)/self.std
        self.augment = augment

    def __len__(self): return len(self.x)

    def __getitem__(self, idx):
        xb = self.x[idx]
        if self.augment:
            if random.random() < .5:
                xb += np.random.normal(0, .02, xb.shape)
        xb = torch.from_numpy(xb).float()
        if self.y is None:
            return xb
        return xb, torch.tensor(self.y[idx]-1)  # shift to 0-17

def conv3(in_c, out_c, s=1): return nn.Conv1d(in_c, out_c, 3, s, 1, bias=False)

class Block(nn.Module):
    def __init__(self, in_c, out_c, s=1):
        super().__init__()
        self.conv1 = conv3(in_c, out_c, s)
        self.bn1   = nn.BatchNorm1d(out_c)
        self.relu  = nn.ReLU(inplace=True)
        self.conv2 = conv3(out_c, out_c)
        self.bn2   = nn.BatchNorm1d(out_c)
        self.down  = None
        if s!=1 or in_c!=out_c:
            self.down = nn.Sequential(nn.Conv1d(in_c, out_c, 1, s, bias=False),
                                      nn.BatchNorm1d(out_c))
    def forward(self, x):
        i = x
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        if self.down: i = self.down(i)
        return self.relu(x + i)

class ResNetTiny(nn.Module):
    def __init__(self, n_cls):
        super().__init__()
        self.stem = nn.Sequential(
            nn.Conv1d(3, 16, 7, 2, 3, bias=False),
            nn.BatchNorm1d(16), nn.ReLU(inplace=True),
            nn.MaxPool1d(3, 2, 1)
        )
        self.layer1 = Block(16,16)
        self.layer2 = Block(16,32,2)
        self.layer3 = Block(32,64,2)
        self.pool   = nn.AdaptiveAvgPool1d(1)
        self.fc     = nn.Linear(64, n_cls)

    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x); x = self.layer2(x); x = self.layer3(x)
        x = self.pool(x).squeeze(-1)
        return self.fc(x)

In [26]:


def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
    return total_loss / len(loader.dataset)

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    ys, preds = [], []
    for xb, yb in loader:
        xb = xb.to(device)
        logits = model(xb)
        preds.append(logits.argmax(dim=1).cpu())
        ys.append(yb)
    y_true = torch.cat(ys)
    y_pred = torch.cat(preds)
    return f1_score(y_true, y_pred, average='macro')


def loso_train_one_location(loc_id: str):
    csv_path = TRAIN_DIR / f'{loc_id}_windows.csv'
    df       = pd.read_csv(csv_path)
    subjects = sorted(df['sbj_id'].unique())

    print(f'\n=== LOCATION {loc_id}  ({csv_path.name})  '
          f'{len(df)} windows, {len(subjects)} subjects ===')

    best_overall_f1, best_state = -1, None
    for val_subj in subjects:                            # LOSO loop
        tr_df = df[df.sbj_id != val_subj]
        va_df = df[df.sbj_id == val_subj]

        tr_ds = WindowDS(tr_df, fit_scaler=True, augment=True)
        va_ds = WindowDS(va_df, scaler=(tr_ds.mean, tr_ds.std))

        weights = Counter(tr_ds.y)
        weights = [1/weights[y] for y in tr_ds.y]
        sampler = WeightedRandomSampler(weights, len(tr_ds), replacement=True)

        tr_dl = DataLoader(tr_ds, batch_size=batch_size,
                           sampler=sampler, num_workers=2, pin_memory=True)
        va_dl = DataLoader(va_ds, batch_size=batch_size,
                           shuffle=False, num_workers=2)

        model = ResNetTiny(num_classes).to(device)
        opt   = torch.optim.AdamW(model.parameters(), lr=LR,
                                  weight_decay=WEIGHT_DECAY)
        crit  = nn.CrossEntropyLoss()

        for epoch in range(1, num_epochs + 1):
            _ = train_one_epoch(model, tr_dl, opt, crit)
        f1 = evaluate(model, va_dl)
        print(f'  val subj {val_subj:2d}:  F1={f1:.4f}')
        if f1 > best_overall_f1:
            best_overall_f1, best_state = f1, model.state_dict()

    print(f'>>> BEST LOSO F1 for location {loc_id}: {best_overall_f1:.4f}')
    torch.save({'state': best_state,
                'mean':  tr_ds.mean,
                'std':   tr_ds.std},
               WORK_DIR / f'location_{loc_id}.pth')
    print(f'checkpoint saved →  {WORK_DIR/ f"location_{loc_id}.pth"}')

In [None]:
os.makedirs(WORK_DIR, exist_ok=True)
for loc in LOCATION_IDS:
    loso_train_one_location(loc)
    torch.cuda.empty_cache()


=== LOCATION right_arm  (right_arm_windows.csv)  83525 windows, 22 subjects ===
  val subj  0:  F1=0.5814
