In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneGroupOut
from torch.utils.data import Dataset, DataLoader
import json
import ast
import os, glob, math, random, json, warnings, itertools, time, gc
from pathlib import Path
from typing import Iterable, List, Tuple

RAW_DIR   = Path('data')
TRAIN_DIR = RAW_DIR / 'dataset_without_null'
TEST_CSV  = RAW_DIR / 'test.csv'                   # public test
LOCATION_IDS = ["right_arm", "left_arm", "right_leg", "left_leg"]
WORK_DIR = Path('work3')

THRESHOLD: float = 0.25
DEFAULT_CLASS: int = 0
device =  'cpu'# torch.device('cuda' if torch.cuda.is_available() else 'cpu')
label_map = {
    'null': 0,'jogging': 1,'jogging (rotating arms)': 2,'jogging (skipping)': 3,'jogging (sidesteps)': 4,'jogging (butt-kicks)': 5,
    'stretching (triceps)': 6,'stretching (lunging)': 7,'stretching (shoulders)': 8,'stretching (hamstrings)': 9,'stretching (lumbar rotation)': 10,
    'push-ups': 11,'push-ups (complex)': 12,'sit-ups': 13,'sit-ups (complex)': 14,'burpees': 15,'lunges': 16,'lunges (complex)': 17,'bench-dips': 18
}
embedding_dim = 64
num_classes = len(label_map.keys())
sequence_length = 50
input_channels = 3
num_layers = 2
hidden_size = 128
learning_rate = 0.001
num_epochs = 50
batch_size = 32
test_subjects = [21]
loc = "right_arm"

In [4]:
!pip freeze


absl-py==2.3.0
anyio==4.9.0
argon2-cffi==25.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==3.0.0
astunparse==1.6.3
async-lru==2.0.5
attrs==25.3.0
babel==2.17.0
beautifulsoup4==4.13.4
bleach==6.2.0
certifi==2025.6.15
cffi==1.17.1
charset-normalizer==3.4.2
comm==0.2.2
contourpy==1.3.2
cycler==0.12.1
debugpy==1.8.14
decorator==5.2.1
defusedxml==0.7.1
executing==2.2.0
fastjsonschema==2.21.1
filelock==3.18.0
flatbuffers==25.2.10
fonttools==4.58.4
fqdn==1.5.1
fsspec==2025.5.1
gast==0.6.0
google-pasta==0.2.0
grpcio==1.73.0
h11==0.16.0
h5py==3.14.0
httpcore==1.0.9
httpx==0.28.1
idna==3.10
ipykernel==6.29.5
ipython==9.3.0
ipython_pygments_lexers==1.1.1
ipywidgets==8.1.7
isoduration==20.11.0
jedi==0.19.2
Jinja2==3.1.6
joblib==1.4.2
json5==0.12.0
jsonpointer==3.0.0
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
jupyter==1.1.1
jupyter-console==6.6.3
jupyter-events==0.12.0
jupyter-lsp==2.2.5
jupyter_client==8.6.3
jupyter_core==5

In [2]:
class HARDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        x = np.array(ast.literal_eval(sample['x_axis']))
        y = np.array(ast.literal_eval(sample['y_axis']))
        z = np.array(ast.literal_eval(sample['z_axis']))

        data = np.vstack((x, y, z)).T
        label = int(sample['label'])

        return torch.FloatTensor(data), torch.LongTensor([label])

def load_and_split_data(data_dir: Path, loc: str):
    csv_path = data_dir / f"{loc}_windows.csv"
    df = pd.read_csv(csv_path)
    train_data = df[~df['sbj_id'].isin(test_subjects)]
    test_data = df[df['sbj_id'].isin(test_subjects)]

    train_dataset = HARDataset(train_data.to_dict('records'))
    test_dataset = HARDataset(test_data.to_dict('records'))

    return train_dataset, test_dataset


class TimeSeriesEmbedder(nn.Module):
    def __init__(self, input_channels, embedding_dim):
        super(TimeSeriesEmbedder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv1d(input_channels, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),

            nn.Conv1d(128, embedding_dim, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            #nn.AdaptiveAvgPool1d(1)
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, input_channels, seq_len)
        x = self.encoder(x)
        x = x.squeeze(-1)  # (batch_size, embedding_dim)
        return x

class DeepConvLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(DeepConvLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.conv1 = nn.Sequential(
            nn.Conv1d(input_size, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        self.lstm = nn.LSTM(128, hidden_size, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, input_size, seq_len)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.permute(0, 2, 1)  # (batch_size, new_seq_len, 128)

        # LSTM
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

# 4. Combined Model
class HARModel(nn.Module):
    def __init__(self, embedder, classifier):
        super(HARModel, self).__init__()
        self.embedder = embedder
        self.classifier = classifier

    def forward(self, x):
        features = self.embedder(x)
        features = features.squeeze(1)
        features = features.permute(0, 2, 1)
        #features = features.unsqueeze(1)  # (batch_size, 1, embedding_dim)
        return self.classifier(features)

def train_embedder(embedder, dataloader):
    embedder.train()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(embedder.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for i, (data, _) in enumerate(dataloader):
            data = data.to(device)

            reconstructed = embedder(data)
            loss = criterion(reconstructed, torch.randn_like(reconstructed))  # Simple autoencoder

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(dataloader)}], Loss: {loss.item():.4f}')

def train_classifier(model, train_loader, test_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (data, labels) in enumerate(train_loader):
            data = data.to(device)
            labels = labels.to(device).squeeze()

            outputs = model(data)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')

        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for data, labels in test_loader:
                data = data.to(device)
                labels = labels.to(device).squeeze()
                outputs = model(data)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            print(f'Test Accuracy: {100 * correct / total:.2f}%')
        model.train()



In [None]:
def threshold_softmatrix(
    softmatrix: np.ndarray,
    threshold: float = 0.25
) -> np.ndarray:

    return np.where(softmatrix < threshold, 0.0, softmatrix)

def predict_with_default(
    softmatrix: np.ndarray,
    threshold: float = 0.25,
    default_class: int = 0
) -> np.ndarray:

    max_conf = softmatrix.max(axis=1)
    preds = softmatrix.argmax(axis=1)
    return np.where(max_conf < threshold, default_class, preds)

In [33]:
train_dataset, test_dataset = load_and_split_data(TRAIN_DIR,LOCATION_IDS[0])
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [34]:
embedder = TimeSeriesEmbedder(input_channels, embedding_dim).to(device)
classifier = DeepConvLSTM(embedding_dim, hidden_size, num_layers, num_classes).to(device)
model = HARModel(embedder, classifier).to(device)

In [36]:
model

HARModel(
  (embedder): TimeSeriesEmbedder(
    (encoder): Sequential(
      (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): ReLU()
      (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(2,))
      (4): ReLU()
      (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv1d(128, 64, kernel_size=(5,), stride=(1,), padding=(2,))
      (7): ReLU()
    )
  )
  (classifier): DeepConvLSTM(
    (conv1): Sequential(
      (0): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): ReLU()
      (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (conv2): Sequential(
      (0): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(2,))
      (1): ReLU()
      (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (lstm): LSTM(128, 128, num_layer

In [None]:
train_classifier(model, train_loader, test_loader)

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

Epoch [1/50], Step [10/4054], Loss: 2.3916
torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

Epoch [1/50], Step [20/4054], Loss: 2.5087
torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

Epoch [1/50], Step [30/4054], Loss: 2.2926
torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3])

torch.Size([32, 50, 3

train_embedder(embedder, train_loader)

In [None]:
print("\nTraining classifier...")
train_classifier(model, train_loader, test_loader)

torch.save(embedder.state_dict(), 'embedder.pth')
torch.save(classifier.state_dict(), 'classifier.pth')
torch.save(model.state_dict(), 'har_model.pth')