# DATA

In [None]:
# Core Libraries
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import joblib

# ML Libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Deep Learning
import torch
from ultralytics import YOLO

# LSTM MODEL

In [None]:
import os
os.chdir("../")



In [None]:
%pwd

In [7]:
# DL LSTM Classifier Training Pipeline
# Author: Priyam

from pathlib import Path
from dataclasses import dataclass
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.utils.multiclass import unique_labels
import joblib
from tqdm import tqdm


# ------------------------------------------------------------
# Config
# ------------------------------------------------------------
@dataclass
class Config:
    feature_dir: Path = Path("artifacts/pose_features")
    model_dir: Path = Path("artifacts/classifiers")
    report_dir: Path = Path("artifacts/classifiers/eval_reports")
    label_encoder_path: Path = model_dir / "label_encoder.pkl"

    batch_size: int = 32
    num_epochs: int = 20
    hidden_dim: int = 128
    learning_rate: float = 1e-3
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

    def resolve_paths(self):
        for attr in ['feature_dir', 'model_dir', 'report_dir']:
            p = getattr(self, attr)
            p.mkdir(parents=True, exist_ok=True)
        return self


# ------------------------------------------------------------
# Dataset
# ------------------------------------------------------------
class PoseDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]


# ------------------------------------------------------------
# LSTM Classifier
# ------------------------------------------------------------
class LSTMClassifier(nn.Module):
    def __init__(self, input_dim=51, hidden_dim=128, num_classes=10):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # Assuming (batch_size, seq_len, input_dim)
        _, (h_n, _) = self.lstm(x)
        return self.fc(h_n[-1])


# ------------------------------------------------------------
# Training & Evaluation
# ------------------------------------------------------------
def train_model(cfg: Config):
    cfg.resolve_paths()

    # Load features
    X_train = np.load(cfg.feature_dir / "train.npy")
    X_test = np.load(cfg.feature_dir / "test.npy")
    y_train = pd.read_csv(cfg.feature_dir / "train_labels.csv")["label"].values
    y_test = pd.read_csv(cfg.feature_dir / "test_labels.csv")["label"].values

    # Label encoding
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_test_enc = le.transform(y_test)
    joblib.dump(le, cfg.label_encoder_path)

    # Dataset and loader
    X_train_seq = X_train[:, None, :]  # shape: (N, 1, 51)
    X_test_seq = X_test[:, None, :]

    train_ds = PoseDataset(X_train_seq, y_train_enc)
    test_ds = PoseDataset(X_test_seq, y_test_enc)
    train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=cfg.batch_size)

    # Model
    model = LSTMClassifier(input_dim=51, hidden_dim=cfg.hidden_dim, num_classes=len(le.classes_))
    model.to(cfg.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    # Training
    model.train()
    for epoch in range(cfg.num_epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(cfg.device), y_batch.to(cfg.device)
            optimizer.zero_grad()
            logits = model(X_batch)
            loss = loss_fn(logits, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{cfg.num_epochs} - Loss: {total_loss/len(train_loader):.4f}")

    # Save model
    torch.save(model.state_dict(), cfg.model_dir / "lstm_classifier.pt")
    print(f"[✓] Saved model to {cfg.model_dir / 'lstm_classifier.pt'}")

    # Evaluation
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(cfg.device)
            logits = model(X_batch)
            preds = torch.argmax(logits, dim=1).cpu()
            y_true.extend(y_batch.numpy())
            y_pred.extend(preds.numpy())

    acc = accuracy_score(y_true, y_pred)

    # Fix for mismatched label sizes
    present_labels = unique_labels(y_true, y_pred)
    report = classification_report(
        y_true, y_pred,
        labels=present_labels,
        target_names=le.inverse_transform(present_labels)
    )

    print(f"\nTest Accuracy: {acc:.4f}")
    print(report)

    with open(cfg.report_dir / "lstm_evaluation.txt", "w") as f:
        f.write(f"Accuracy: {acc:.4f}\n")
        f.write(report)

    print(f"[✓] Saved report to {cfg.report_dir / 'lstm_evaluation.txt'}")


# ------------------------------------------------------------
# Entry point
# ------------------------------------------------------------
if __name__ == "__main__":
    cfg = Config()
    train_model(cfg)


Epoch 1/20 - Loss: 1.3112
Epoch 2/20 - Loss: 1.1152
Epoch 3/20 - Loss: 1.0734
Epoch 4/20 - Loss: 1.0367
Epoch 5/20 - Loss: 1.0150
Epoch 6/20 - Loss: 0.9993
Epoch 7/20 - Loss: 0.9875
Epoch 8/20 - Loss: 0.9753
Epoch 9/20 - Loss: 0.9682
Epoch 10/20 - Loss: 0.9596
Epoch 11/20 - Loss: 0.9570
Epoch 12/20 - Loss: 0.9492
Epoch 13/20 - Loss: 0.9419
Epoch 14/20 - Loss: 0.9418
Epoch 15/20 - Loss: 0.9351
Epoch 16/20 - Loss: 0.9285
Epoch 17/20 - Loss: 0.9232
Epoch 18/20 - Loss: 0.9190
Epoch 19/20 - Loss: 0.9155
Epoch 20/20 - Loss: 0.9146
[✓] Saved model to artifacts/classifiers/lstm_classifier.pt

Test Accuracy: 0.8391
                     precision    recall  f1-score   support

EXERCISE_BODY_SWING       0.64      0.90      0.75       185
   LOOKING_STRAIGHT       1.00      0.99      0.99      1081
   SITTING_STANDING       0.00      0.00      0.00        15
           STANDING       0.00      0.00      0.00       129
           fighting       0.08      0.03      0.04        36
          gesturing

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
