In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [4]:
data_dir = Path('data')
train_dir = data_dir / 'train'
meta_file = data_dir / 'meta_data.txt'
test_file = data_dir/'test.csv'
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [5]:
frames = []
for f in sorted(train_dir.glob('sbj_*.csv')):
    df = pd.read_csv(f, low_memory=False)
    df['subject'] = df['sbj_id'].astype(str)
    frames.append(df)
raw = pd.concat(frames, ignore_index=True)
raw['label_code'] = raw['label'].map(label_map)
raw = raw.dropna(subset=['label_code']).reset_index(drop=True)
raw['label_code'] = raw['label_code'].astype(int)

sensor_cols = [c for c in raw.columns if c.endswith(('_x','_y','_z'))]
loc2idxs = {}
for i, col in enumerate(sensor_cols):
    loc = col[:-2]
    loc2idxs.setdefault(loc, []).append(i)
for loc, idxs in loc2idxs.items():
    axes_order = ['_x','_y','_z']
    loc2idxs[loc] = sorted(idxs, key=lambda j: axes_order.index(sensor_cols[j][-2:]))
locations = list(loc2idxs.keys())
print("Locations:", locations)

scaler = StandardScaler()
raw[sensor_cols] = scaler.fit_transform(raw[sensor_cols])
raw

Locations: ['right_arm_acc', 'right_leg_acc', 'left_leg_acc', 'left_arm_acc']


In [7]:
raw

Unnamed: 0,sbj_id,right_arm_acc_x,right_arm_acc_y,right_arm_acc_z,right_leg_acc_x,right_leg_acc_y,right_leg_acc_z,left_leg_acc_x,left_leg_acc_y,left_leg_acc_z,left_arm_acc_x,left_arm_acc_y,left_arm_acc_z,label,subject,label_code
0,0,0.344072,0.121011,0.711729,0.300999,0.119169,0.134565,0.493112,-0.234144,-0.026379,-0.515422,0.331682,0.323862,jogging,0,1
1,0,0.273151,0.133691,0.742412,0.304795,0.084007,0.115554,0.548486,-0.234504,0.107539,-0.478973,0.336796,0.330691,jogging,0,1
2,0,0.276873,0.158848,0.770868,0.302363,0.076795,0.094021,0.708791,-0.144032,0.356836,-0.411302,0.327018,0.366185,jogging,0,1
3,0,0.352245,0.196907,0.788840,0.302988,0.079565,0.102091,1.088730,-0.064258,0.789935,-0.349172,0.311192,0.397661,jogging,0,1
4,0,0.429321,0.232534,0.808637,0.310620,0.070747,0.163935,0.937406,-0.214691,0.631228,-0.295446,0.247657,0.381219,jogging,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2089033,9,0.667072,-0.677464,0.032796,-0.876339,1.194333,0.811866,-0.761543,0.807607,1.217054,-0.660437,-0.716009,0.319617,bench-dips,9,18
2089034,9,0.677821,-0.675362,0.070558,-0.847397,1.161031,0.735734,-0.755701,0.793455,1.297743,-0.663010,-0.723955,0.356210,bench-dips,9,18
2089035,9,0.680623,-0.700228,0.115179,-0.805340,1.141661,0.595746,-0.762383,0.780207,1.322601,-0.662831,-0.700415,0.299180,bench-dips,9,18
2089036,9,0.666913,-0.737024,0.110382,-0.764552,1.135209,0.493043,-0.780855,0.781609,1.297705,-0.663853,-0.702628,0.306702,bench-dips,9,18


In [8]:
WINDOW=128; STRIDE=64

class WearWindowDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.starts = []  # (subject,start)
        for subj in df['subject'].unique():
            sub_df = df[df['subject']==subj]
            for s in range(0, len(sub_df)-WINDOW+1, STRIDE):
                self.starts.append((subj, s))
    def __len__(self): return len(self.starts)
    def __getitem__(self, idx):
        subj, start = self.starts[idx]
        sub_df = self.df[self.df['subject']==subj].iloc[start:start+WINDOW]
        sample = {}
        for loc, idxs in loc2idxs.items():
            sample[loc] = torch.tensor(sub_df.iloc[:, [sensor_cols.index(sensor_cols[j]) for j in idxs]].values, dtype=torch.float32)
        y = torch.tensor(np.bincount(sub_df['label_code']).argmax(), dtype=torch.long)
        return sample, y

dataset = WearWindowDataset(raw)
train_idx, val_idx = train_test_split(range(len(dataset)), test_size=0.2, stratify=[y.item() for _,y in dataset])

def collate_fn(batch):
    all_labels = torch.stack([b[1] for b in batch])
    merged = {}
    for loc in locations:
        merged[loc] = torch.stack([b[0][loc] for b in batch])  # (batch, seq, 3)
    return merged, all_labels

train_loader = DataLoader(dataset, batch_size=64, sampler=torch.utils.data.SubsetRandomSampler(train_idx), collate_fn=collate_fn)
val_loader   = DataLoader(dataset, batch_size=64, sampler=torch.utils.data.SubsetRandomSampler(val_idx),   collate_fn=collate_fn)


In [10]:
class LocationEmbedder(nn.Module):
    def __init__(self, emb=32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(3, 16, 3, padding=1), nn.ReLU(),
            nn.Conv1d(16, emb, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
    def forward(self, x):
        x = x.permute(0,2,1)
        return self.net(x).squeeze(-1)

class DeepConvLSTMflex(nn.Module):
    def __init__(self, locs, emb=32, hidden=128, classes=19):
        super().__init__()
        self.locs = locs
        self.embed = nn.ModuleDict({loc: LocationEmbedder(emb) for loc in locs})
        self.lstm = nn.LSTM(len(locs)*emb, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, classes)
    def forward(self, batch_dict):
        embs = [self.embed[loc](batch_dict[loc]) for loc in self.locs]
        x = torch.cat(embs, dim=1).unsqueeze(1)
        out, _ = self.lstm(x)
        return self.fc(out[:, -1])

In [11]:
model = DeepConvLSTMflex(locations, emb=32, hidden=128, classes=19).to(device)

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [13]:
EPOCHS=10
for ep in range(1,EPOCHS+1):
    model.train(); loss_sum=0
    for batch_x, batch_y in train_loader:
        batch_y=batch_y.to(device)
        batch_x={loc:t.to(device) for loc,t in batch_x.items()}
        optimizer.zero_grad(); pred=model(batch_x); loss=criterion(pred,batch_y)
        loss.backward(); optimizer.step(); loss_sum+=loss.item()*batch_y.size(0)
    train_loss=loss_sum/len(train_idx)
    # Validation
    model.eval(); val_loss=0; correct=0
    with torch.no_grad():
        for bx,by in val_loader:
            by=by.to(device)
            bx={loc:t.to(device) for loc,t in bx.items()}
            logits=model(bx)
            val_loss+=criterion(logits,by).item()*by.size(0)
            correct+=(logits.argmax(1)==by).sum().item()
    val_loss/=len(val_idx); acc=correct/len(val_idx)
    print(f"Ep {ep}/{EPOCHS} - train {train_loss:.3f} | val {val_loss:.3f} acc {acc:.3f}")

Ep 1/10 - train nan | val nan acc 0.000
Ep 2/10 - train nan | val nan acc 0.000
Ep 3/10 - train nan | val nan acc 0.000


KeyboardInterrupt: 

In [None]:
test_raw=pd.read_csv(test_file)
piv=test_raw.pivot(index='id', columns='sensor_location', values=['x_axis','y_axis','z_axis'])
piv.columns=[f"{loc}_{ax[0]}" for ax,loc in piv.columns]
seqs,ids=[],[]
for start in range(0,len(piv)-WINDOW+1,STRIDE):
    window=piv.iloc[start:start+WINDOW]
    sample={} ;
    for loc in locations:
        cols=[f"{loc}_{a}" for a in ['x','y','z']]
        if all(c in window.columns for c in cols):
            vals=window[cols].values
            avail_cols=[sensor_cols.index(c) for c in cols if c in sensor_cols]
            if avail_cols:
                vals=scaler.transform(pd.DataFrame(vals,columns=cols))[cols].values
            sample[loc]=torch.tensor(vals,dtype=torch.float32)
        else:
            sample[loc]=torch.zeros(WINDOW,3)
    seqs.append(sample); ids.append(window.index[0])



In [None]:
model.eval(); preds=[]
with torch.no_grad():
    for sample in seqs:
        bx={loc:t.unsqueeze(0).to(device) for loc,t in sample.items()}
        preds.append(model(bx).argmax(1).item())
preds