In [1]:
TRAIN_CSV_PATH = "/home/olel/Projects/productivity_robot_backend/train/emotion_classifier/train.csv"
VALID_CSV_PATH = "/home/olel/Projects/productivity_robot_backend/train/emotion_classifier/validation.csv"
BATCH_SIZE = 32
EPOCHS = 80

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import os
import csv as _csv

In [3]:
class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, data_path):
        print(f"[INFO] Loading dataset from {data_path}")
        self.classes = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

        # self.data_file = open(data_path, 'r')
        # self.reader = _csv.reader(self.data_file, delimiter=',', quotechar='"')
        # next(self.reader)
        # self.label = np.array([self.get_label_index(row[0]) for row in self.reader])
        
        self.data_file = open(data_path, 'r')
        self.reader = _csv.reader(self.data_file, delimiter=',', quotechar='"')
        next(self.reader)

        self.data = [row for row in self.reader]
        np.random.shuffle(self.data)
        self.label = np.array([self.get_label_index(row[0]) for row in self.data])
        try:
            self.data = np.array([list(map(float, row[1:])) for row in self.data], dtype=np.float32)
        except ValueError as e:
            print(f"[ERROR] ValueError while converting data to float: {e}")
        finally:
            print(f"[INFO] Closing data file")
            print(f"[DEBUG] Last row read: {self.data[-1]}, label: {self.label[-1]}, length of data: {len(self.data)}, length of label: {len(self.label)}")
            self.data_file.close()
            self.reader = None
            # self.data[-1]

        # self.data = np.array([])
        # self.label = np.array([])
        # for row in self.reader:
        #     index = int(len(self.data)*np.random.rand())
        #     if type(row[0]) is not str or not row[0] in self.classes: 
        #         print(f"[WARN] label_name is not str: {row[0]} ({type(row[0])}), maybe empty line or end of file? skipping...")
        #         continue
        #     self.data = np.insert(self.data, index, np.array([float(x) for x in row[1:]]), axis=0) if len(self.data) > 0 else np.array([np.array([float(x) for x in row[1:]])])
        #     self.label = np.insert(self.label, index, self.get_label_index(row[0]))
        #     print(f"[DEBUG] self.label shape: {self.label.shape}, self.data shape: {self.data.shape}, total samples: 26784 or 6599", end='\r')

        # self.data = np.array([row for row in self.reader])
        # print(f"[INFO] Shuffling dataset")
        # np.random.shuffle(self.data)
        print(self.label[:10])

        # self.label = np.array([self.get_label_index(row[0]) for row in self.data])
        # self.data = np.array([list(map(float, row[1:])) for row in self.data], dtype=np.float32)

        print(f"[INFO] Loaded {len(self.data)} samples from {data_path}")
        print(f"[INFO] len of labels: {len(self.label)}")

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]
    
    def get_label_index(self, label_name):
        try:
            index = self.classes.index(label_name)
        except ValueError:
            print(f"[ERROR] label_name '{label_name}' not found in classes {self.classes}")
        return self.classes.index(label_name)

In [4]:
train_dataset = EmotionDataset(data_path=TRAIN_CSV_PATH)
valid_dataset = EmotionDataset(data_path=VALID_CSV_PATH)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

[INFO] Loading dataset from /home/olel/Projects/productivity_robot_backend/train/emotion_classifier/train.csv
[INFO] Closing data file
[DEBUG] Last row read: [ 0.50400186  0.6909957  -0.12264685 ...  0.7118063   0.3888369
  0.03782844], label: 3, length of data: 26783, length of label: 26783
[4 4 0 3 5 2 2 6 0 3]
[INFO] Loaded 26783 samples from /home/olel/Projects/productivity_robot_backend/train/emotion_classifier/train.csv
[INFO] len of labels: 26783
[INFO] Loading dataset from /home/olel/Projects/productivity_robot_backend/train/emotion_classifier/validation.csv
[INFO] Closing data file
[DEBUG] Last row read: [ 0.5172771   0.78088796 -0.1413979  ...  0.7201979   0.40283176
  0.16438115], label: 2, length of data: 6598, length of label: 6598
[2 5 6 5 3 4 4 6 3 3]
[INFO] Loaded 6598 samples from /home/olel/Projects/productivity_robot_backend/train/emotion_classifier/validation.csv
[INFO] len of labels: 6598


In [5]:
class EmotionClassifier(nn.Module):
    def __init__(self, num_landmarks=478, num_features=3, num_classes=7):
        super().__init__()
        self.num_landmarks = num_landmarks
        self.num_features = num_features

        self.conv_layers = nn.Sequential(
            nn.Conv1d(num_features, 64, kernel_size=5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),

            nn.Conv1d(64, 128, kernel_size=5, padding=2),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2),

            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )

        # After pooling twice, sequence length roughly reduces to num_landmarks / 4
        reduced_len = num_landmarks // 4
        self.fc_layers = nn.Sequential(
            nn.Linear(256 * reduced_len, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        # x shape: [batch, 1434]
        x = x.view(-1, self.num_landmarks, self.num_features)  # [B, 478, 3]
        x = x.permute(0, 2, 1)  # [B, 3, 478] for Conv1d (channels-first)
        x = self.conv_layers(x)
        x = x.flatten(1)  # flatten all except batch
        return self.fc_layers(x)

In [6]:
def fit(model, epochs, train_loader, valid_loader, loss_function, optimizer, device):
    best_loss = float('inf')
    best_epoch = -1
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        model.train()
        for inputs, labels in train_loader:
            if device.type == 'cuda':
                inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        model.eval()
        with torch.no_grad():
            for inputs, labels in valid_loader:
                if device.type == 'cuda':
                    inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = loss_function(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        epoch_loss = running_loss / total
        epoch_acc = correct / total

        print(f"[INFO] Epoch {epoch}: Loss={epoch_loss:.4f}, Acc={epoch_acc:.4f}")
        torch.save(model.state_dict(), f"emotion_classifier_epoch_{epoch}_{epoch_loss}.pth")
        print(f"[INFO] Saved model checkpoint for epoch {epoch}")
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_epoch = epoch
            
    print(f"[INFO] Best validation loss: {best_loss:.4f} at epoch {best_epoch}")
    # torch.save(model.state_dict(), f"emotion_classifier_best_epoch_{best_epoch}_{best_loss}.pth")

In [7]:
model = EmotionClassifier()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == 'cuda':
    print(f"[INFO] Using GPU: {torch.cuda.get_device_name(0)}")
    model = model.to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

[INFO] Using GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [8]:
fit(model, EPOCHS, train_loader=train_loader, valid_loader=valid_loader, loss_function=loss_function, optimizer=optimizer, device=device)

[INFO] Epoch 0: Loss=1.6310, Acc=0.3950
[INFO] Saved model checkpoint for epoch 0
[INFO] Epoch 1: Loss=1.5527, Acc=0.3692
[INFO] Saved model checkpoint for epoch 1
[INFO] Epoch 2: Loss=1.5198, Acc=0.3851
[INFO] Saved model checkpoint for epoch 2
[INFO] Epoch 3: Loss=1.5084, Acc=0.4192
[INFO] Saved model checkpoint for epoch 3
[INFO] Epoch 4: Loss=1.4939, Acc=0.4263
[INFO] Saved model checkpoint for epoch 4
[INFO] Epoch 5: Loss=1.5536, Acc=0.3750
[INFO] Saved model checkpoint for epoch 5
[INFO] Epoch 6: Loss=1.5319, Acc=0.4268
[INFO] Saved model checkpoint for epoch 6
[INFO] Epoch 7: Loss=1.4929, Acc=0.4447
[INFO] Saved model checkpoint for epoch 7
[INFO] Epoch 8: Loss=1.4749, Acc=0.4542
[INFO] Saved model checkpoint for epoch 8
[INFO] Epoch 9: Loss=1.4842, Acc=0.4303
[INFO] Saved model checkpoint for epoch 9
[INFO] Epoch 10: Loss=1.5248, Acc=0.4397
[INFO] Saved model checkpoint for epoch 10
[INFO] Epoch 11: Loss=1.4469, Acc=0.4588
[INFO] Saved model checkpoint for epoch 11
[INFO] Epoch

KeyboardInterrupt: 