## Sitting Posture Detection Training

In [104]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [105]:
class PostureDataset(Dataset):
    def __init__(self, data, scaler=None, label_encoder=None, from_csv=True):
        """
        Args:
        - data: CSV file path or DataFrame containing the dataset.
        - scaler: StandardScaler for feature normalization.
        - label_encoder: LabelEncoder for encoding the labels (not required if already encoded).
        - from_csv: Set to True if the input is a CSV path.
        """
        # Load data
        if from_csv:
            data = pd.read_csv(data)
        
        # Ensure required columns are present
        if 'class' not in data.columns:
            raise ValueError("The input data must contain a 'class' column.")
        
        # Extract features and labels
        self.X = data.drop(columns=['class']).values  # Features
        self.y = data['class'].values  # Labels (already encoded)

        # Normalize features using StandardScaler
        if scaler:
            self.X = scaler.transform(self.X)
        else:
            self.scaler = StandardScaler()
            self.X = self.scaler.fit_transform(self.X)
        
        self.X = torch.tensor(self.X, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.long)  # Ensure labels are integers

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

    def get_class_info(self):
        """Print class encoding and number of samples per class."""
        print("\nNumber of Samples per Class:")
        class_counts = pd.Series(self.y.numpy()).value_counts()
        for idx, count in class_counts.items():
            print(f"Class {idx}: {count} samples")


In [106]:
# 2. MLP Model
class MLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),  # Hidden layer 1
            nn.ReLU(),
            nn.Linear(128, 64),  # Hidden layer 2
            nn.ReLU(),
            nn.Linear(64, num_classes)  # Output layer
        )

    def forward(self, x):
        return self.model(x)

In [107]:
# Updated Training Function
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_loss = 0
        correct_train, total_train = 0, 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        
        train_acc = correct_train / total_train
        
        # Validation Phase
        model.eval()
        val_loss, correct_val, total_val = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()
        
        val_acc = correct_val / total_val
        
        # Print epoch stats
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_acc:.4f}")

In [114]:
# Configuration
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)

BATCH_SIZE = 32
LEARNING_RATE = 0.001
EPOCHS = 50
NUM_CLASSES = 4  # Crossed legs, Proper, Slouching, Reclining

if __name__ == "__main__":
    # Load and preprocess data
    csv_path = "../../datasets/vectors/augmented_xy_filtered_keypoints_vectors_mediapipe.csv"
    data = pd.read_csv(csv_path)  # Load the dataset
    
    # Encode class labels to integers using LabelEncoder
    label_encoder = LabelEncoder()
    data['class'] = label_encoder.fit_transform(data['class'])

    # Split the data into training and test datasets
    train_data, test_data = train_test_split(data, test_size=0.3, random_state=RANDOM_SEED)

    # Standardize features
    scaler = StandardScaler()
    train_features = train_data.drop(columns=['class']).values
    scaler.fit(train_features)

    # Create datasets and loaders
    train_dataset = PostureDataset(train_data, scaler=scaler, label_encoder=None, from_csv=False)
    print("Training dataset:")
    train_dataset.get_class_info()
    print("\n------------------------------------\n")
    test_dataset = PostureDataset(test_data, scaler=scaler, label_encoder=None, from_csv=False)
    print("Testing dataset:")
    test_dataset.get_class_info()
    print("\n------------------------------------\n")
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Model initialization
    input_size = train_features.shape[1]
    model = MLP(input_size, NUM_CLASSES)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    
    # Train the model and calculate test loss
    train_model(model, train_loader, test_loader, criterion, optimizer, epochs=EPOCHS)


Training dataset:

Number of Samples per Class:
Class 0: 196 samples
Class 1: 192 samples
Class 3: 183 samples
Class 2: 180 samples

------------------------------------

Testing dataset:

Number of Samples per Class:
Class 1: 98 samples
Class 0: 80 samples
Class 2: 73 samples
Class 3: 71 samples

------------------------------------

Epoch [1/50], Train Loss: 1.2976, Train Acc: 0.4394, Val Loss: 1.2146, Val Acc: 0.4969
Epoch [2/50], Train Loss: 1.1250, Train Acc: 0.5060, Val Loss: 1.0971, Val Acc: 0.4783
Epoch [3/50], Train Loss: 1.0379, Train Acc: 0.5406, Val Loss: 1.0247, Val Acc: 0.5776
Epoch [4/50], Train Loss: 0.9635, Train Acc: 0.6245, Val Loss: 0.9715, Val Acc: 0.6118
Epoch [5/50], Train Loss: 0.8992, Train Acc: 0.6578, Val Loss: 0.9025, Val Acc: 0.6211
Epoch [6/50], Train Loss: 0.8143, Train Acc: 0.6897, Val Loss: 0.8354, Val Acc: 0.6366
Epoch [7/50], Train Loss: 0.7505, Train Acc: 0.6964, Val Loss: 0.7814, Val Acc: 0.6460
Epoch [8/50], Train Loss: 0.6806, Train Acc: 0.7310, V