In [2]:
import os, gc, json, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupShuffleSplit
from tqdm import tqdm
warnings.filterwarnings('ignore')

import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
try:
    from sktime.classification.kernel_based import RocketClassifier
except Exception:
    try:
        from sktime.classification.kernel_based.rocket import RocketClassifier
    except Exception:
        raise ImportError(
            "RocketClassifier konnte nicht importiert werden. "
            "Bitte installiere scikit-learn==1.2.2 und sktime==0.18.1"
        )

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

Device: cpu


In [3]:
WIN = 128   # ~2.56 s @50 Hz
STEP = 64   # 50 % overlap
ROCKET_KERNELS = 10000
data_dir = Path('data')
train_dir = data_dir / 'train'
meta_file = data_dir / 'meta_data.txt'
test_file = data_dir/'test.csv'
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}
num_classes = len(label_map)
C = 3
crit = nn.CrossEntropyLoss()
print(len(label_map))

19


In [5]:
frames = []
for f in sorted(train_dir.glob('sbj_*.csv')):
    df = pd.read_csv(f, low_memory=False)
    df['subject'] = df['sbj_id'].astype(str)          # Text‑ID
    frames.append(df)
raw = pd.concat(frames, ignore_index=True)
print('Shape raw:', raw.shape)

raw['label_code'] = raw['label'].map(label_map)
raw = raw.dropna(subset=['label_code']).reset_index(drop=True)
raw['label_code'] = raw['label_code'].astype(int)

sensor_cols = [c for c in raw.columns if c.endswith(('_x','_y','_z'))]
locs = sorted({c[:-2] for c in sensor_cols})          # alphabetische Liste aller Locations
ax_order = ['_x','_y','_z']
loc_axes_cols = {loc:[f'{loc}{a}' for a in ax_order] for loc in locs}
print('Locations:', locs)

before = len(raw)
raw = raw.dropna(subset=sensor_cols).reset_index(drop=True)
print(f'Remove NaN‑rows in sensors: {before-len(raw)} rows dropped')

scaler = StandardScaler()
raw[sensor_cols] = scaler.fit_transform(raw[sensor_cols])

raw['time_idx'] = raw.groupby('subject').cumcount()

dfs_long = []

Shape raw: (3466400, 15)
Locations: ['left_arm_acc', 'left_leg_acc', 'right_arm_acc', 'right_leg_acc']
Remove NaN‑rows in sensors: 34274 rows dropped


In [7]:
class DeepConvLSTM_Single(nn.Module):
    def __init__(self, in_ch=3, classes=num_classes, hidden=128, conv_ch=64, dropout=0.2):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(in_ch, conv_ch, 5, padding=2), nn.ReLU(), nn.Dropout(dropout),
            nn.Conv1d(conv_ch, conv_ch, 5, padding=2), nn.ReLU(), nn.Dropout(dropout),
            nn.Conv1d(conv_ch, conv_ch, 5, padding=2), nn.ReLU(), nn.Dropout(dropout),
            nn.Conv1d(conv_ch, conv_ch, 5, padding=2), nn.ReLU()
        )
        self.lstm = nn.LSTM(conv_ch, hidden, num_layers=2, batch_first=True, dropout=dropout)
        self.fc   = nn.Linear(hidden, classes)
    def forward(self, x):
        x = self.conv(x)
        x = x.permute(0,2,1)
        out,_ = self.lstm(x)
        return self.fc(out[:,-1])



In [8]:
crit = nn.CrossEntropyLoss()

for loc in locs:
    cols = loc_axes_cols[loc]
    df_loc = raw[['subject','time_idx','label_code'] + cols].copy()
    df_loc = df_loc.rename(columns={cols[0]: 'x_axis', cols[1]: 'y_axis', cols[2]: 'z_axis'})
    df_loc['sensor_location'] = loc
    df_loc['sbj_id'] = df_loc['subject']
    df_loc['id'] = df_loc['time_idx']
    dfs_long.append(df_loc[['id','sbj_id','sensor_location','x_axis','y_axis','z_axis','label_code']])
long_df = pd.concat(dfs_long, ignore_index=True)
print('Langformat Trainingsdaten:', long_df.shape)
print('Beispiel:', long_df.head())

scaler = StandardScaler()
raw[sensor_cols] = scaler.fit_transform(raw[sensor_cols])

Langformat Trainingsdaten: (8219056, 7)
Beispiel:    id sbj_id sensor_location    x_axis    y_axis    z_axis  label_code
0   0      0    left_arm_acc -0.515422  0.331682  0.323862           1
1   1      0    left_arm_acc -0.478973  0.336796  0.330691           1
2   2      0    left_arm_acc -0.411302  0.327018  0.366185           1
3   3      0    left_arm_acc -0.349172  0.311192  0.397661           1
4   4      0    left_arm_acc -0.295446  0.247657  0.381219           1


In [9]:
X_all, y_all, subj_all, loc_all = [], [], [], []
for subj_id, sub_df in tqdm(raw.groupby('subject')):
    for loc in locs:
        cols = loc_axes_cols[loc]
        if not all(c in sub_df.columns for c in cols):
            continue
        sub_loc = sub_df.dropna(subset=cols).reset_index(drop=True)
        if len(sub_loc) < WIN:
            continue
        data = sub_loc[cols].values  # (T_loc, 3)
        # Sliding Windows
        for s in range(0, len(data) - WIN + 1, STEP):
            win = data[s:s+WIN]
            X_all.append(win)
            y_all.append(sub_loc['label_code'].iloc[s:s+WIN].mode()[0])
            subj_all.append(subj_id)
            loc_all.append(locs.index(loc))
# Arrays erzeugen
X_all = np.stack(X_all)
y_all = np.array(y_all)
loc_all = np.array(loc_all)
print('Gesamt‑Windows:', X_all.shape)

100%|██████████| 22/22 [00:20<00:00,  1.05it/s]


Gesamt‑Windows: (128292, 128, 3)


In [10]:
from torch.utils.data import Dataset, DataLoader, Subset

class LocationWindowDataset(Dataset):
    def __init__(self, df_loc, win, step):
        self.df = df_loc.reset_index(drop=True)
        self.win = win
        self.step = step
        # Berechne Startpositionen
        self.starts = list(range(0, len(self.df) - win + 1, step))
    def __len__(self):
        return len(self.starts)
    def __getitem__(self, idx):
        s = self.starts[idx]
        window = self.df.iloc[s:s+self.win]
        # Channels-first Array
        x = window[['x_axis','y_axis','z_axis']].values.T.astype(np.float32)
        # Label per Window (Modus)
        y = window['label_code'].mode()[0]
        # Group for splitting
        grp = window['sbj_id'].iloc[0]
        return torch.from_numpy(x), int(y), grp

In [13]:
from sktime.transformations.panel.rocket import (
    MiniRocket,
    MiniRocketMultivariate,
    MiniRocketMultivariateVariable,
)

In [None]:
conv_models = {}
rocket_models = {}
conv_scores = {}
rocket_scores = {}

for loc in locs:
    df_loc = long_df[long_df['sensor_location']==loc]
    if df_loc.empty:
        print(f"Keine Daten für {loc}, überspringe.")
        continue

    ds = LocationWindowDataset(df_loc, WIN, STEP)
    n = len(ds)
    groups = [ds[i][2] for i in range(n)]
    y_all = [ds[i][1] for i in range(n)]

    # Split via GroupShuffleSplit
    gss = GroupShuffleSplit(test_size=0.2, n_splits=1, random_state=42)
    train_idx, val_idx = next(gss.split(list(range(n)), y_all, groups))
    train_ds = Subset(ds, train_idx)
    val_ds   = Subset(ds, val_idx)

    # ConvLSTM-Loader
    tr_ld = DataLoader(train_ds, batch_size=128, shuffle=True)
    va_ld = DataLoader(val_ds,   batch_size=128)

    model = DeepConvLSTM_Single(in_ch=3).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    for ep in range(5):
        model.train(); total_loss=0
        for xb, yb, _ in tr_ld:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            loss = crit(model(xb), yb)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            total_loss += loss.item() * yb.size(0)
        model.eval(); correct=0
        with torch.no_grad():
            for xb, yb, _ in va_ld:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb).argmax(1)
                correct += (pred == yb).sum().item()
        acc = correct / len(val_ds)
    conv_models[loc] = model
    conv_scores[loc] = acc
    print(f"ConvLSTM {loc} VAL Acc: {acc:.3f}")

    MAX_ROCKET_WINDOWS = 20000
    sel_tr_idx = np.random.RandomState(42).choice(train_idx, size=min(len(train_idx), MAX_ROCKET_WINDOWS), replace=False)
    sel_vl_idx = np.random.RandomState(43).choice(val_idx,   size=min(len(val_idx),   5000),              replace=False)

    def windows_to_panel(idxs):
        plist, labels = [], []
        for i in idxs:
            x, y, _ = ds[i]
            plist.append({
                'x': pd.Series(x[0].cpu().numpy()),
                'y': pd.Series(x[1].cpu().numpy()),
                'z': pd.Series(x[2].cpu().numpy())
            })
            labels.append(y)
        return pd.DataFrame(plist), np.array(labels)

    Xp_tr, y_tr = windows_to_panel(train_idx)
    Xp_vl, y_vl = windows_to_panel(val_idx)

    rc = RocketClassifier(num_kernels=ROCKET_KERNELS, random_state=42)
    rc.fit(Xp_tr, y_tr)
    preds_vl = rc.predict(Xp_vl)
    rocket_models[loc] = rc
    rocket_scores[loc] = (preds_vl == y_vl).mean()
    print(f"Rocket {loc} VAL Acc: {rocket_scores[loc]:.3f}")
    del train_ds, val_ds, tr_ld, va_ld, model, rc, Xp_tr, Xp_vl, preds_vl, sel_tr_idx, sel_vl_idx
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()


In [None]:
import joblib
for loc, m in rocket_models.items():
    joblib.dump(m, f"rocket_model_{loc}.pkl")
for loc, m in conv_models.items():
    torch.save(m.state_dict(), f"conv_model_{loc}.pt")
print("Modelle gespeichert.")

In [None]:
test_df = pd.read_csv(test_file)
results = []
for loc in locs:
    rc = joblib.load(f"rocket_model_{loc}.pkl")
    model = DeepConvLSTM_Single(in_ch=3)
    model.load_state_dict(torch.load(f"conv_model_{loc}.pt", map_location=device))
    model.to(device).eval()

    df_loc = test_df[test_df['sensor_location']==loc].copy()
    if df_loc.empty:
        continue
    seqs, ids = [], []
    for sid, grp in df_loc.groupby('sbj_id'):
        data = grp[['x_axis','y_axis','z_axis']].values
        for s in range(0, len(data)-WIN+1, STEP):
            seqs.append(data[s:s+WIN])
            ids.append(grp['id'].iloc[s])
    if not seqs:
        continue
    panel = [{'x':pd.Series(w[:,0]), 'y':pd.Series(w[:,1]), 'z':pd.Series(w[:,2])} for w in seqs]
    Xp = pd.DataFrame(panel)
    rocket_preds = rc.predict(Xp)
    Xt = torch.tensor(np.stack(seqs).transpose(0,2,1), dtype=torch.float32).to(device)
    with torch.no_grad():
        conv_preds = model(Xt).argmax(1).cpu().numpy()
    for idx, rp, cp in zip(ids, rocket_preds, conv_preds):
        results.append({'id': idx, 'location': loc, 'rocket_pred': int(rp), 'conv_pred': int(cp)})
res_df = pd.DataFrame(results)
print("Test-Inferenz abgeschlossen. Zusammenfassung:")
display(res_df.head())
res_df.to_csv('test_predictions.csv', index=False)
print("Ergebnisse gespeichert: test_predictions.csv")