In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split


In [2]:
data = pd.read_csv('all_pose_data.csv')
data.head()

Unnamed: 0,class,video,frame,track_id,frame_valid,kpt0_x,kpt0_y,kpt0_conf,kpt1_x,kpt1_y,...,kpt13_conf,kpt14_x,kpt14_y,kpt14_conf,kpt15_x,kpt15_y,kpt15_conf,kpt16_x,kpt16_y,kpt16_conf
0,PullUps,v_Pullup_g01_c01.avi,1,-1,1,0.527683,0.541908,0.878457,0.552925,0.519896,...,0.042595,0.480293,0.641447,0.036947,0.55262,0.679507,0.025023,0.583128,0.701049,0.02151
1,PullUps,v_Pullup_g01_c01.avi,2,-1,1,0.528147,0.526812,0.977107,0.549901,0.513626,...,0.017612,0.475282,0.662587,0.013899,0.557475,0.846681,0.002577,0.485492,0.822145,0.002404
2,PullUps,v_Pullup_g01_c01.avi,3,-1,1,0.529285,0.524921,0.720074,0.551841,0.504365,...,0.023261,0.529129,0.664121,0.016251,0.540558,0.775117,0.007324,0.546998,0.742991,0.006099
3,PullUps,v_Pullup_g01_c01.avi,3,-1,1,0.52434,0.526584,0.742671,0.549948,0.504847,...,0.015906,0.457032,0.634831,0.012997,0.535915,0.806089,0.003339,0.450418,0.769494,0.003134
4,PullUps,v_Pullup_g01_c01.avi,4,-1,1,0.592095,0.491487,0.296254,0.599038,0.470912,...,0.026849,0.55411,0.75007,0.019282,0.563745,0.808004,0.017888,0.566042,0.836535,0.013777


In [4]:
data.shape

(28476, 56)

In [11]:
tracked_data = data[data["track_id"] != -1].copy()

In [12]:
tracked_data = tracked_data.drop(tracked_data[tracked_data["frame_valid"] == 0].index)

In [14]:
tracked_data.shape

(26316, 56)

In [15]:
tracked_data.head()

Unnamed: 0,class,video,frame,track_id,frame_valid,kpt0_x,kpt0_y,kpt0_conf,kpt1_x,kpt1_y,...,kpt13_conf,kpt14_x,kpt14_y,kpt14_conf,kpt15_x,kpt15_y,kpt15_conf,kpt16_x,kpt16_y,kpt16_conf
39,PullUps,v_Pullup_g01_c01.avi,7,4,1,0.526088,0.479043,0.918699,0.547763,0.47019,...,0.019228,0.4404,0.690555,0.018786,0.532351,0.778964,0.002266,0.456372,0.76323,0.002519
40,PullUps,v_Pullup_g01_c01.avi,8,4,1,0.572025,0.4892,0.220292,0.574766,0.485169,...,0.027499,0.541456,0.879203,0.024883,0.479138,0.762059,0.001504,0.560725,0.850471,0.001298
41,PullUps,v_Pullup_g01_c01.avi,9,4,1,0.532492,0.467502,0.7518,0.547638,0.455707,...,0.015887,0.492614,0.736532,0.014251,0.488137,0.824852,0.001001,0.501867,0.82897,0.000938
42,PullUps,v_Pullup_g01_c01.avi,10,4,1,0.564332,0.437661,0.165362,0.565938,0.442883,...,0.020283,0.536364,0.687336,0.024501,0.49855,0.634825,0.004184,0.558362,0.701676,0.004582
43,PullUps,v_Pullup_g01_c01.avi,11,4,1,0.520871,0.454179,0.923241,0.542102,0.446035,...,0.015671,0.461379,0.839078,0.015596,0.500996,0.835476,0.000956,0.468981,0.834197,0.001028


## Dataset Cleaning

In [27]:
track_counts = (
    tracked_data.groupby(["video", "track_id"])
      .size()
      .reset_index(name="frame_count")
)

In [28]:
K = 1

track_counts = track_counts.sort_values(
    ["video", "frame_count"],
    ascending=[True, False]
)

track_counts["rank"] = (
    track_counts
    .groupby("video")
    .cumcount() + 1
)

track_counts["keep"] = track_counts["rank"] <= K


In [None]:
tracks_to_keep = track_counts[track_counts["keep"]][["video", "track_id"]]

df_filtered = tracked_data.merge(
    tracks_to_keep,
    on=["video", "track_id"],
    how="inner"
)

## Dataset Preperation

In [4]:
class ActionCSVDataset(Dataset):
    def __init__(self, csv_file, seq_len=30):
        df = pd.read_csv(csv_file)
        self.seq_len = seq_len
        # Group by sequence_id to keep frames together
        self.sequences = []
        self.labels = []
        
        for _, group in df.groupby('sequence_id'):
            # Extract only the keypoint columns (kpt_0 to kpt_33)
            kpts = group.filter(like='kpt_').values
            if len(kpts) == seq_len:
                self.sequences.append(kpts)
                self.labels.append(group['label'].iloc[0])
        
        self.sequences = torch.tensor(self.sequences, dtype=torch.float32)
        self.labels = torch.tensor(self.labels, dtype=torch.long)

    def __len__(self): return len(self.labels)
    def __getitem__(self, idx): return self.sequences[idx], self.labels[idx]

# --- Training Setup ---
dataset = ActionCSVDataset('action_data.csv')
train_idx, test_idx = train_test_split(range(len(dataset)), test_size=0.1)
train_loader = DataLoader(torch.utils.data.Subset(dataset, train_idx), batch_size=16, shuffle=True)

## Bi-LSTM Model

In [5]:
class BiLSTMActionModel(nn.Module):
    def __init__(self, input_size=34, hidden_size=64, num_layers=2, num_classes=2):
        super(BiLSTMActionModel, self).__init__()
        
        # Bi-LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, bidirectional=True, dropout=0.2)
        
        # Fully connected layer (Hidden * 2 because it's Bidirectional)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, num_classes)
        )
        
    def forward(self, x):
        # x shape: (batch, sequence_length, input_size)
        out, _ = self.lstm(x)
        
        # We only take the output from the last time step
        out = self.fc(out[:, -1, :])
        return out

## Model Training

In [9]:
# Define Model (using the BiLSTMActionModel from before)
model = BiLSTMActionModel(input_size=34, num_classes=2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
for epoch in range(150):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} Loss: {loss.item():.4f}")

torch.save(model.state_dict(), 'action_model.pth')

Epoch 1 Loss: 0.7452
Epoch 2 Loss: 0.7484
Epoch 3 Loss: 0.7401
Epoch 4 Loss: 0.7377
Epoch 5 Loss: 0.7362
Epoch 6 Loss: 0.7348
Epoch 7 Loss: 0.7328
Epoch 8 Loss: 0.7306
Epoch 9 Loss: 0.7364
Epoch 10 Loss: 0.7361
Epoch 11 Loss: 0.7276
Epoch 12 Loss: 0.7210
Epoch 13 Loss: 0.7198
Epoch 14 Loss: 0.7181
Epoch 15 Loss: 0.7257
Epoch 16 Loss: 0.7315
Epoch 17 Loss: 0.7321
Epoch 18 Loss: 0.7397
Epoch 19 Loss: 0.7031
Epoch 20 Loss: 0.7080
Epoch 21 Loss: 0.7160
Epoch 22 Loss: 0.7094
Epoch 23 Loss: 0.6899
Epoch 24 Loss: 0.7095
Epoch 25 Loss: 0.7159
Epoch 26 Loss: 0.7025
Epoch 27 Loss: 0.6949
Epoch 28 Loss: 0.6716
Epoch 29 Loss: 0.6456
Epoch 30 Loss: 0.6994
Epoch 31 Loss: 0.6921
Epoch 32 Loss: 0.6598
Epoch 33 Loss: 0.6620
Epoch 34 Loss: 0.6300
Epoch 35 Loss: 0.7037
Epoch 36 Loss: 0.6638
Epoch 37 Loss: 0.6691
Epoch 38 Loss: 0.6131
Epoch 39 Loss: 0.5841
Epoch 40 Loss: 0.5960
Epoch 41 Loss: 0.6137
Epoch 42 Loss: 0.5127
Epoch 43 Loss: 0.5234
Epoch 44 Loss: 0.4509
Epoch 45 Loss: 0.5026
Epoch 46 Loss: 0.51

In [11]:
# Create a dummy input that matches your input shape (Batch, Seq, Features)
dummy_input = torch.randn(1, 30, 34)

# Trace the model
traced_model = torch.jit.script(model, dummy_input)

# Save the serialized model
traced_model.save('action_model_jit.pt')

  traced_model = torch.jit.script(model, dummy_input)
