In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

In [2]:
from utils import get_cuda_info

get_cuda_info()

PyTorch version: 2.5.1+cu118
**********
_CUDA version: 
CUDA version:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Wed_Oct_30_01:18:48_Pacific_Daylight_Time_2024
Cuda compilation tools, release 12.6, V12.6.85
Build cuda_12.6.r12.6/compiler.35059454_0

**********
CUDNN version: 90100
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 4070 Ti SUPER


## Zdobycie danych

In [3]:
from utils import load_data

all_data, all_labels = load_data('ravdess')

## Preprocessing danych

In [4]:
from utils import preprocess_data

X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(all_data, all_labels)

In [5]:
print(X_train.shape)
print(y_train.shape)

torch.Size([2012, 157, 478, 2])
torch.Size([2012, 8])


In [6]:
from utils import get_class_distribution

get_class_distribution(all_labels)

===> Class distribution <===
1: 192
2: 383
3: 384
4: 382
5: 383
6: 383
7: 384
8: 384


# MODEL TORCH

## Zbudowanie modelu ekstrakcji cech

In [7]:
import torch.nn as nn
import torch.nn.functional as F

In [8]:
class EmotionClassifier(nn.Module):
    def __init__(self):
        super(EmotionClassifier, self).__init__()
        
        # Spatial feature extraction using Conv1D
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        
        # LSTM layers for temporal feature extraction
        self.lstm1 = nn.LSTM(input_size=32 * 239, hidden_size=128, batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(input_size=128 * 2, hidden_size=64, batch_first=True)
        
        # Fully connected classification layer
        self.fc = nn.Linear(64, 8)  # 8 emotion classes

    def forward(self, x):
        # x shape: (batch_size, frames, landmarks, coordinates)
        batch_size, frames, landmarks, coordinates = x.shape
        
        # Reshape for Conv1D: (batch_size * frames, landmarks, coordinates)
        x = x.view(-1, landmarks, coordinates).permute(0, 2, 1)
        
        # Spatial feature extraction
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        
        # Flatten spatial features
        x = x.view(batch_size, frames, -1)  # (batch_size, frames, features)
        
        # Temporal feature extraction
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        
        # Classification
        x = self.fc(x[:, -1, :])  # Take the last timestep's output
        x = F.softmax(x, dim=1)
        return x

In [9]:
from torch.optim import Adam

model = EmotionClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

## Trening modelu

In [10]:
from torch.utils.tensorboard import SummaryWriter

RUNS_FOLDER_PATH = os.path.abspath('runs')
writer_path = os.path.join('runs', 'torch_lstm', 'emotion_classifier')
writer = SummaryWriter(writer_path)

In [11]:
from utils.model_functions import train_torch_model_multiclass

train_torch_model_multiclass(model, criterion, optimizer, X_train, y_train, X_val, y_val, writer=writer)


                                          EPOCH STATISTICS                                          
Epoch       : 1
----------------------------------------------------------------------------------------------------
                     TRAINING                                         VALIDATION                    
----------------------------------------------------------------------------------------------------
Loss        : 130.836383                                    Loss        : 29.033313
Accuracy    : 0.1243                                    Accuracy    : 0.1276
Precision   : 0.0780                                    Precision   : 0.0160
Recall      : 0.1155                                    Recall      : 0.1250
F1 Score    : 0.0804                                    F1 Score    : 0.0283


                                          EPOCH STATISTICS                                          
Epoch       : 2
--------------------------------------------------------------------

## Ewaluacja modelu

In [12]:
from utils.model_functions import eval_torch_model_multiclass

eval_torch_model_multiclass(model, criterion, X_test, y_test)


                                          EPOCH STATISTICS                                          
Epoch       : 1
----------------------------------------------------------------------------------------------------
                                             VALIDATION                                             
----------------------------------------------------------------------------------------------------
Loss        : 27.070522
Accuracy    : 0.3472
Precision   : 0.3099
Recall      : 0.3161
F1 Score    : 0.2989



# MODEL SEGLEARN

In [None]:
from sklearn.ensemble import RandomForestClassifier
from seglearn.pipe import Pype
from seglearn.transform import FeatureRep, Segment

### Przekształcenie danych na wektor płaski połączonych współrzędnych

In [13]:
X_train_np = X_train.numpy()
X_val_np = X_val.numpy()
X_test_np = X_test.numpy()
y_train_np = y_train.numpy()
y_val_np = y_val.numpy()
y_test_np = y_test.numpy()

In [14]:
def flatten_landmarks(data):
    n_samples, n_timesteps, n_landmarks, n_coords = data.shape
    return data.reshape(n_samples, n_timesteps, n_landmarks * n_coords)

X_train_flat = flatten_landmarks(X_train_np)
X_val_flat = flatten_landmarks(X_val_np)
X_test_flat = flatten_landmarks(X_test_np)

In [15]:
print(X_train_flat.shape, y_train_np.shape)

(2012, 157, 956) (2012, 8)


### Budowa modelu

In [None]:
pipe = Pype([
    ("segment", Segment(width=20, step=10)),  # Segmentacja sekwencji
    ("features", FeatureRep()),              # Ekstrakcja cech
    ("rf", RandomForestClassifier(n_estimators=100))  # RandomForest
])

### Trening modelu

In [62]:
pipe.fit(X_train_flat, y_train_np)

### Ewaluacja modelu

In [64]:
val_accuracy = pipe.score(X_val_flat, y_val_np)
test_accuracy = pipe.score(X_test_flat, y_test_np)

print(f"Dokładność na zbiorze walidacyjnym: {val_accuracy:.2f}")
print(f"Dokładność na zbiorze testowym: {test_accuracy:.2f}")

Dokładność na zbiorze walidacyjnym: 0.64
Dokładność na zbiorze testowym: 0.62


# TODYNET

### Przygotowanie danych

In [None]:
TodyNet_DATA_PATH = os.path.join("..", "..", "src", "external", "TodyNet", "data", "UCR", "EMOTIONS")

os.makedirs(TodyNet_DATA_PATH, exist_ok=True)

In [None]:
import torch

X_train_tensor = torch.tensor(X_train_flat, dtype=torch.float32).unsqueeze(1)  # adding channel dimension
X_val_tensor = torch.tensor(X_val_flat, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_flat, dtype=torch.float32).unsqueeze(1)

y_train_class = torch.argmax(y_train, dim=1)
y_val_class = torch.argmax(y_val, dim=1)
y_test_class = torch.argmax(y_test, dim=1)

# Save the data in PyTorch (.pt) format
torch.save(X_train_tensor, os.path.join(TodyNet_DATA_PATH, 'X_train.pt'))
torch.save(X_val_tensor, os.path.join(TodyNet_DATA_PATH, 'X_valid.pt'))
torch.save(X_test_tensor, os.path.join(TodyNet_DATA_PATH, 'X.pt'))

# Save the labels in PyTorch (.pt) format
torch.save(y_train_class, os.path.join(TodyNet_DATA_PATH, 'y_train.pt'))
torch.save(y_val_class, os.path.join(TodyNet_DATA_PATH, 'y_valid.pt'))
torch.save(y_test_class, os.path.join(TodyNet_DATA_PATH, 'y.pt'))

In [18]:
X_train_tensor.shape

torch.Size([2012, 1, 157, 956])

### Trening modelu [pool_ratio 0.8, ponieważ rozmiar danych jest zbyt duży na 0.2]

In [None]:
# cd .\src\external\TodyNet\src\ & python train.py --dataset='EMOTIONS' --pool_ratio 0.8