In [29]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import shap
from torch.utils.tensorboard import SummaryWriter
from torch.optim import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, TensorDataset

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Get data

In [10]:
DATA_PATH = os.path.abspath(os.path.join('..', '..', '..', 'data', 'processed', 'ravdess'))

In [11]:
def load_data(data_path):
    all_data = []
    all_labels = []

    for file in os.listdir(data_path):
        if file.endswith(".npy"):
            data = np.load(os.path.join(data_path, file), allow_pickle=True)
            data = np.array(data, dtype=np.float32)

            all_data.append(data)

            label = int(file.split("-")[2])
            all_labels.append(label)

    return np.array(all_data, dtype=object), np.array(all_labels)

In [12]:
all_data, all_labels = load_data(DATA_PATH)

In [22]:
def preprocess_data(data, labels):
    tensor_data = [torch.tensor(d, dtype=torch.float32) for d in data]
    padded_data = pad_sequence(tensor_data, batch_first=True)

    encoder = LabelBinarizer()
    encoded_labels = encoder.fit_transform(labels)
    encoded_labels = torch.tensor(encoded_labels, dtype=torch.float32)

    X_train, X_temp, y_train, y_temp = train_test_split(
        padded_data, encoded_labels, test_size=0.3, random_state=42
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42
    )

    return X_train, X_val, X_test, y_train, y_val, y_test

X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(all_data, all_labels)

# PODEJŚCIE 1 - Dodanie warstwy self-attention

## MODEL

Próba dodania warstwy atencji, by model automatycznie decydował które z landmarków są ważne.

In [24]:
class EmotionClassifier(nn.Module):
    def __init__(self, num_landmarks=478):
        super(EmotionClassifier, self).__init__()
        
        self.num_landmarks = num_landmarks
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        
        # Attention layer: Assign weights to each landmark
        self.attention = nn.Linear(num_landmarks // 2, num_landmarks // 2)  # Reduce dimensionality
        
        # LSTM layers
        self.lstm1 = nn.LSTM(input_size=(num_landmarks // 2) * 32, hidden_size=128, batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(input_size=128 * 2, hidden_size=64, batch_first=True)
        
        # Fully connected classification layer
        self.fc = nn.Linear(64, 8)

    def forward(self, x):
        batch_size, frames, landmarks, coordinates = x.shape
        x = x.view(-1, landmarks, coordinates).permute(0, 2, 1)  # Shape: (batch_size*frames, 2, 478)
        
        x = F.relu(self.conv1(x))  # Shape: (batch_size*frames, 32, 478)
        x = self.pool1(x)  # Shape: (batch_size*frames, 32, 239) - Because of pooling
        
        # Compute landmark importance using attention
        x_mean = x.mean(dim=1)  # Average over filters -> (batch_size*frames, 239)
        attn_weights = torch.sigmoid(self.attention(x_mean))  # Learnable landmark weights
        
        x = x * attn_weights.unsqueeze(1)  # Apply learned importance to landmarks
        
        x = x.view(batch_size, frames, -1)  
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        
        x = self.fc(x[:, -1, :])  
        return x


## Trening

In [25]:
BATCH_SIZE = 32
EPOCHS = 200
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EmotionClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

In [27]:
writer = SummaryWriter("runs/torch-lstm/feature_selection")

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        y_batch = y_batch.argmax(dim=1)
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(y_batch).sum().item()
        total += y_batch.size(0)
    
    train_acc = correct / total

    # Validation
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_batch = y_batch.argmax(dim=1)
            
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(y_batch).sum().item()
            total += y_batch.size(0)
    
    val_acc = correct / total

    writer.add_scalar("Loss/Train", train_loss, epoch)
    writer.add_scalar("Loss/Validation", val_loss, epoch)
    writer.add_scalar("Accuracy/Train", train_acc, epoch)
    writer.add_scalar("Accuracy/Validation", val_acc, epoch)

    print(f"Epoch {epoch + 1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1/200, Train Loss: 130.5693, Train Acc: 0.1173, Val Loss: 28.8773, Val Acc: 0.1299
Epoch 2/200, Train Loss: 130.1450, Train Acc: 0.1193, Val Loss: 28.8686, Val Acc: 0.1276
Epoch 3/200, Train Loss: 130.1265, Train Acc: 0.1233, Val Loss: 28.8807, Val Acc: 0.1276
Epoch 4/200, Train Loss: 130.0376, Train Acc: 0.1332, Val Loss: 28.8561, Val Acc: 0.1253
Epoch 5/200, Train Loss: 130.0314, Train Acc: 0.1228, Val Loss: 28.8616, Val Acc: 0.1276
Epoch 6/200, Train Loss: 130.0584, Train Acc: 0.1248, Val Loss: 28.8544, Val Acc: 0.1462
Epoch 7/200, Train Loss: 130.0230, Train Acc: 0.1342, Val Loss: 28.8600, Val Acc: 0.1276
Epoch 8/200, Train Loss: 129.9842, Train Acc: 0.1372, Val Loss: 28.8298, Val Acc: 0.1276
Epoch 9/200, Train Loss: 129.7192, Train Acc: 0.1690, Val Loss: 28.7719, Val Acc: 0.1717
Epoch 10/200, Train Loss: 128.7858, Train Acc: 0.1784, Val Loss: 28.1453, Val Acc: 0.2065
Epoch 11/200, Train Loss: 126.4844, Train Acc: 0.1958, Val Loss: 28.0498, Val Acc: 0.1903
Epoch 12/200, Train

## SHAP wartości

In [None]:
explainer = shap.GradientExplainer(model, X_train.to('cpu'))

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

# PODEJŚCIE 2 - Trening autoencodera.

- Train a 1D convolutional autoencoder with your raw time-series data.
- Extract the most informative latent features instead of all landmarks.
- Use only these features in your final classifier.