In [9]:
import os, glob, math, random, json, warnings, itertools, time, gc
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import LeaveOneGroupOut
import json
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from typing import Tuple, List
import ast

warnings.filterwarnings('ignore')
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

# ------------------------------------------------------------------
RAW_DIR   = Path('data')
TRAIN_DIR = RAW_DIR / 'dataset_by_location'
TEST_CSV  = RAW_DIR / 'test.csv'                   # public test
LOCATION_IDS = ["right_arm", "left_arm", "right_leg", "left_leg"]
WORK_DIR = Path('work3')
WIN_SIZE  = 50
STRIDE    = 25
BATCH_SZ  = 256
EPOCHS    = 20
EMB_DIM   = 64
LR        = 1e-3
NUM_WORKERS = 4
DEVICE    = 'cpu'#cuda' if torch.cuda.is_available() else 'cpu'
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}
loc = "right_arm"

In [5]:
class DeepConvLSTMEmbedder(nn.Module):
    """3×T input → (embedding_dim, logits)"""
    def __init__(
        self,
        n_classes: int,
        embedding_dim: int = 128,
        conv_channels: Tuple[int, int, int] = (32, 64, 96),
        lstm_hidden: int = 128,
        lstm_layers: int = 1,
        dropout_p: float = 0.2,
    ):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.feature_extractor = self._build_conv_stack(conv_channels, dropout_p)
        self.temporal_model  = nn.LSTM(conv_channels[-1], lstm_hidden, lstm_layers,
                                       batch_first=True)
        self.embedding_head  = nn.Sequential(
            nn.Linear(lstm_hidden, embedding_dim, bias=False),
            nn.BatchNorm1d(embedding_dim),
            nn.ReLU(inplace=True),
        )
        self.classifier      = nn.Linear(embedding_dim, n_classes)

    @staticmethod
    def _build_conv_stack(channels: Tuple[int, int, int], dropout_p: float) -> nn.Sequential:
        groups = [4, 8, 8]
        kernels = [5, 7, 9]
        layers: List[nn.Module] = []
        in_c = 3
        for out_c, g, k in zip(channels, groups, kernels):
            layers += [
                nn.Conv1d(in_c, out_c, kernel_size=k, padding=k//2),
                nn.GroupNorm(g, out_c),
                nn.SELU(inplace=True),
            ]
            if out_c != channels[-1]:
                layers.append(nn.Dropout(dropout_p))
            in_c = out_c
        return nn.Sequential(*layers)

    def forward(self, x: torch.Tensor):  # x: (B, 3, T)
        feats = self.feature_extractor(x)          # (B, C, T)
        feats = feats.permute(0, 2, 1)             # (B, T, C)
        lstm_out, _ = self.temporal_model(feats)   # (B, T, H)
        pooled = lstm_out.mean(dim=1)              # (B, H)
        emb    = self.embedding_head(pooled)       # (B, D)
        logits = self.classifier(emb)              # (B, n_cls)
        return emb, logits

In [6]:
class DeepConvLSTMClassifier(nn.Module):
    """A classifier that *reuses* a frozen embedder’s conv+LSTM and learns a
    fresh classification head. Optionally fine‑tunes the encoder.
    """
    def __init__(self, encoder: DeepConvLSTMEmbedder, n_classes: int,
                 train_encoder: bool = False):
        super().__init__()
        self.encoder = encoder
        # freeze?
        for p in self.encoder.parameters():
            p.requires_grad = train_encoder
        self.head = nn.Sequential(
            nn.Linear(encoder.embedding_dim, encoder.embedding_dim//2),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(encoder.embedding_dim//2),
            nn.Dropout(0.2),
            nn.Linear(encoder.embedding_dim//2, n_classes),
        )

    def forward(self, x: torch.Tensor):
        with torch.set_grad_enabled(self.encoder.feature_extractor[0].weight.requires_grad):
            emb, _ = self.encoder(x)
        return self.head(emb)


class WearDataset(Dataset):
    """Takes a list of (x,y,z arrays, label) tuples."""
    def __init__(self, samples: list[tuple[torch.Tensor, int]]):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        signal, label = self.samples[idx]
        return signal.float(), torch.tensor(label, dtype=torch.long)


def build_dataloaders(data_dir: Path, loc: str, batch: int = 128) -> tuple[DataLoader, DataLoader]:
    """Reads `<loc>_windows.csv`, parses signals, splits by subject into train/test sets."""
    import random
    csv_path = data_dir / f"{loc}_windows.csv"
    df = pd.read_csv(csv_path)
    subj_ids = sorted(df['sbj_id'].unique().tolist())
    random.seed(42)
    test_subjs = random.sample(subj_ids, k=2)
    train_df = df[~df['sbj_id'].isin(test_subjs)]
    test_df  = df[df['sbj_id'].isin(test_subjs)]

    def df_to_samples(dataframe):
        samples: list[tuple[torch.Tensor, int]] = []
        for _, row in dataframe.iterrows():
            x = torch.tensor(ast.literal_eval(row['x_axis']), dtype=torch.float)
            y = torch.tensor(ast.literal_eval(row['y_axis']), dtype=torch.float)
            z = torch.tensor(ast.literal_eval(row['z_axis']), dtype=torch.float)
            sig = torch.stack([x, y, z], dim=0)
            lbl = int(row['label'])
            samples.append((sig, lbl))
        return samples

    train_samples = df_to_samples(train_df)
    test_samples  = df_to_samples(test_df)

    train_labels = [lbl for _, lbl in train_samples]
    class_counts = pd.Series(train_labels).value_counts().sort_index().values
    weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
    samples_weight = torch.tensor([weights[lbl] for _, lbl in train_samples])
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(WearDataset(train_samples), batch_size=batch,
                              sampler=sampler, drop_last=True)
    test_loader  = DataLoader(WearDataset(test_samples),  batch_size=batch,
                              shuffle=False)
    return train_loader, test_loader

# ────────────────────────────────────────────────────────────────────────────
#  Training utilities
# ────────────────────────────────────────────────────────────────────────────

def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss, total, correct = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        _, logits = model(x)
        loss = criterion(logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * y.size(0)
        total      += y.size(0)
        correct    += (logits.argmax(1) == y).sum().item()
    return total_loss/total, correct/total

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss, total, correct = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        _, logits = model(x)
        loss = criterion(logits, y)
        total_loss += loss.item() * y.size(0)
        total      += y.size(0)
        correct    += (logits.argmax(1) == y).sum().item()
    return total_loss/total, correct/total


19

In [None]:
train_loader, val_loader = build_dataloaders(TRAIN_DIR,LOCATION_IDS[0], 128)
device = torch.device(DEVICE)
if TRUE:
    model = DeepConvLSTMEmbedder(n_classes=len(list(label_map.keys()))).to(device)
else:  # finetune
    enc = DeepConvLSTMEmbedder(n_classes=len(list(label_map.keys())))
    enc.load_state_dict(torch.load(f"embedder_{args.loc}.pt"))
    model = DeepConvLSTMClassifier(enc, args.classes, train_encoder=False).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)

best_acc = 0.0
for epoch in range(1, args.epochs + 1):
    tr_loss, tr_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    va_loss, va_acc = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch:02d}: train {tr_loss:.4f}/{tr_acc:.3f} | val {va_loss:.4f}/{va_acc:.3f}")
    # basic checkpointing
    if va_acc > best_acc:
        best_acc = va_acc
        filename = (
            f"{'embedder' if args.mode=='pretrain' else 'clf'}_{args.loc}.pt"
        )
        torch.save(model.state_dict(), filename)

In [83]:
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import f1_score
from ast import literal_eval
from numpy import nan

class WearModelBuilder:
    def __init__(self, num_classes=19):
        self.num_classes = num_classes
        self.label_map = {
            'null': 0, 'jogging': 1, 'jogging (rotating arms)': 2,
            'jogging (skipping)': 3, 'jogging (butt-kicks)': 4,
            'stretching (triceps)': 5, 'stretching (lunging)': 6,
            'stretching (shoulders)': 7, 'stretching (hamstrings)': 8,
            'stretching (lumbar rotation)': 9, 'push-ups': 10,
            'push-ups (complex)': 11, 'sit-ups': 12,
            'sit-ups (complex)': 13, 'burpees': 14,
            'lunges': 15, 'lunges (complex)': 16,
            'bench-dips': 17
        }

    def load_data(self, file_path):
        with open(file_path) as f:
            df = pd.read_csv(f)
        df['x_axis'] = df['x_axis'].apply(lambda row: eval(row))
        df['y_axis'] = df['y_axis'].apply(lambda row: eval(row))
        df['z_axis'] = df['z_axis'].apply(lambda row: eval(row))
        df['sbj_id'] = df['sbj_id'].astype(int)
        df['label'] = df['label'].astype(int)
        return df

    def prepare_datasets(self, df, test_size=0.2, random_state=42):
        subjects = df['sbj_id'].unique()
        train_subjects, test_subjects = train_test_split(
            subjects, test_size=test_size, random_state=random_state
        )

        train_df = df[df['sbj_id'].isin(train_subjects)]
        test_df = df[df['sbj_id'].isin(test_subjects)]

        return train_df, test_df

    def build_conv_lstm_model(self, input_shape):
        """Build a ConvLSTM model based on challenge recommendations"""
        inputs = Input(shape=input_shape)

        # Conv block 1
        x = Conv1D(64, 5, activation='relu', padding='same')(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Conv block 2
        x = Conv1D(64, 5, activation='relu', padding='same')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # LSTM layer
        x = LSTM(128, return_sequences=False)(x)
        x = Dropout(0.5)(x)

        # Output
        outputs = Dense(self.num_classes, activation='softmax')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        return model

    def train_model(self, train_df, test_df, epochs=50, batch_size=32):
        """Train a model on the given data"""
        train_df["x_axis"]=train_df["x_axis"].apply(lambda row: np.array(row))
        train_df["y_axis"]=train_df["y_axis"].apply(lambda row: np.array(row))
        train_df["z_axis"]=train_df["z_axis"].apply(lambda row: np.array(row))
        test_df["x_axis"]=test_df["x_axis"].apply(lambda row: np.array(row))
        test_df["y_axis"]=test_df["y_axis"].apply(lambda row: np.array(row))
        test_df["z_axis"]=test_df["z_axis"].apply(lambda row: np.array(row))
        X_train = train_df[['x_axis', 'y_axis', 'z_axis']].values
        y_train = train_df['label'].values
        X_test = test_df[['x_axis', 'y_axis', 'z_axis']].values
        y_test = test_df['label'].values
        train_array = np.zeros((X_train.shape[0], 3, 50))
        for i in range(X_train.shape[0]):
            for j in range(X_train.shape[1]):
                train_array[i][j]=np.array(X_train[i][j])
        test_array = np.zeros((X_test.shape[0], 3, 50))
        for i in range(X_test.shape[0]):
            for j in range(X_test.shape[1]):
                test_array[i][j]=np.array(X_test[i][j])

        scaler = StandardScaler()
        X_train = scaler.fit_transform(np.array([x.flatten() for x in train_array])).reshape(train_array.shape)
        X_test = scaler.transform(np.array([x.flatten() for x in test_array])).reshape(test_array.shape)

        y_train = to_categorical(y_train, num_classes=self.num_classes)
        y_test = to_categorical(y_test, num_classes=self.num_classes)

        model = self.build_conv_lstm_model(input_shape=(3, 50))

        callbacks = [
            EarlyStopping(patience=10, restore_best_weights=True),
            ModelCheckpoint('best_model.h5', save_best_only=True)
        ]

        history = model.fit(
            X_train, y_train,
            validation_data=(X_test, y_test),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )

        y_pred = model.predict(X_test).argmax(axis=1)
        y_true = test_df['label'].values
        f1 = f1_score(y_true, y_pred, average='macro')
        print(f"Macro F1-Score: {f1:.4f}")
        return model, history, f1

In [None]:
for entry in LOCATION_IDS[1:]:
    wear_model_builder = WearModelBuilder()
    df = wear_model_builder.load_data(TRAIN_DIR/f"{entry}_windows.csv")
    result = wearModelBuilder.prepare_datasets(df)
    wear_model_builder.build_conv_lstm_model(result[0].shape)
    model = wear_model_builder.train_model(result[0], result[1])
    model[0].save(WORK_DIR/f"model-{entry}.h5")

In [81]:
test_df = pd.read_csv("./data/test.csv")
for entry in LOCATION_IDS:
test_df["x_axis"]=test_df["x_axis"].apply(lambda row: np.array([float(x) for x in eval(row)]))
test_df["y_axis"]=test_df["y_axis"].apply(lambda row: np.array([float(x) for x in eval(row)]))
test_df["z_axis"]=test_df["z_axis"].apply(lambda row: np.array([float(x) for x in eval(row)]))
X_test = test_df[['x_axis', 'y_axis', 'z_axis']].values
test_array = np.zeros((X_test.shape[0], 3, 50))
for i in range(X_test.shape[0]):
    for j in range(X_test.shape[1]):
        test_array[i][j]=np.array(X_test[i][j])
scaler = StandardScaler()
X_test = scaler.fit_transform(np.array([x.flatten() for x in test_array])).reshape(test_array.shape)
    model = tf.keras.models.load_model(WORK_DIR/f"model-{entry}.h5")


results = model[0].predict(X_test)
result_df = pd.DataFrame([list(test_df["id"].values), list([np.argmax(x) for x in results])]).transpose().rename(columns={0: 'id', 1: 'label'})
result_df.to_csv("submission.csv", index=False)

[1m1530/1530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
