In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

In [2]:
from utils import get_cuda_info

get_cuda_info()

PyTorch version: 2.5.1+cu118
**********
_CUDA version: 
CUDA version:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Wed_Oct_30_01:18:48_Pacific_Daylight_Time_2024
Cuda compilation tools, release 12.6, V12.6.85
Build cuda_12.6.r12.6/compiler.35059454_0

**********
CUDNN version: 90100
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 4070 Ti SUPER


## Zdobycie danych

In [3]:
from utils import load_data

all_data, all_labels = load_data('miami_deception')

## Preprocessing danych

In [4]:
from utils import preprocess_data

X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(all_data, all_labels, binarize_labels=False)

In [5]:
print(X_train.shape)
print(y_train.shape)

torch.Size([224, 1679, 478, 2])
torch.Size([224])


In [6]:
from utils import get_class_distribution

get_class_distribution(all_labels)

===> Class distribution <===
0: 160
1: 160


# MODEL TORCH

### W podejściu wykorzystane zostaną 2 modele - pierwszy z nich będzie siecią konwolucyjną 2d, która będzie miała za zadanie nauczyć się rozpoznawać cechy charakterystyczne dla wybranej klatki (zbioru współrzędnych pkt charakterystycznych). Do klasyfikacji szeregu czasowego zostanie wykorzystana sekwencyjna sieć neuronowa LSTM.

## Zbudowanie modelu ekstrakcji cech

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class LieClassifier(nn.Module):
    def __init__(self):
        super(LieClassifier, self).__init__()
        
        # Spatial feature extraction
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        
        # Calculate the correct dimension after convolutions
        # Assuming initial landmarks is 478 (since 239*2=478)
        # After pool1: 239, after pool2: 119
        self.conv_output_size = 64 * 119  # 64 channels * 119 landmarks
        
        # Temporal feature extraction
        self.lstm = nn.LSTM(input_size=self.conv_output_size, hidden_size=128, 
                           batch_first=True, bidirectional=True)
        
        # Classification head
        self.fc1 = nn.Linear(256, 64)  # 128*2 for bidirectional
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(64, 1)
        
        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name:
                nn.init.xavier_normal_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0.1)

    def forward(self, x):
        # x shape: (batch_size, frames, landmarks, coordinates)
        batch_size, frames, landmarks, coordinates = x.shape
        
        # Reshape for Conv1D: combine batch and frames
        x = x.view(-1, landmarks, coordinates)  # (batch*frames, landmarks, 2)
        x = x.permute(0, 2, 1)  # (batch*frames, 2, landmarks)
        
        # Spatial features
        x = F.relu(self.conv1(x))
        x = self.pool1(x)  # (batch*frames, 32, landmarks//2)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)  # (batch*frames, 64, landmarks//4)
        
        # Flatten spatial features
        x = x.view(batch_size, frames, -1)  # (batch, frames, 64*(landmarks//4))
        
        # Temporal features
        x, _ = self.lstm(x)  # (batch, frames, 256)
        
        # Take the last timestep
        x = x[:, -1, :]  # (batch, 256)
        
        # Classification head
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc2(x))
        
        return x

In [None]:
from torch.optim import Adam

model = LieClassifier()
pos_weight = torch.tensor([(len(y_train) - y_train.sum()) / y_train.sum()]).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = Adam(model.parameters(), lr=1e-4)

## Trening modelu

In [10]:
from torch.utils.tensorboard import SummaryWriter

RUNS_FOLDER_PATH = os.path.abspath('runs')
writer_path = os.path.join('runs', 'torch_lstm', 'lie_classifier')
writer = SummaryWriter(writer_path)

### Diagnostyka

In [12]:
from utils.model_functions import overfit_model

model_diag = LieClassifier()
pos_weight_diag = torch.tensor([(len(y_train) - y_train.sum()) / y_train.sum()]).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
criterion_diag = nn.BCEWithLogitsLoss(pos_weight=pos_weight_diag)
optimizer_diag = Adam(model.parameters(), lr=1e-3)

overfit_model(model_diag, criterion_diag, optimizer_diag, X_train, y_train)


=== Debug Mode ===
Input shape: torch.Size([32, 1679, 478, 2])
Label distribution: 0.59 (1s)

Step 0:
Loss: 0.6524
Accuracy: 59.38%
Predictions (5 samples): [0.6559 0.6467 0.6443 0.6499 0.6228]
Labels (5 samples): [1. 1. 1. 1. 0.]
Param conv1.weight: shape (32, 2, 3) | grad norm: 0.000000
Param conv1.bias: shape (32,) | grad norm: 0.017600
Param conv2.weight: shape (64, 32, 3) | grad norm: 0.017697
Param conv2.bias: shape (64,) | grad norm: 0.017991
Param lstm.weight_ih_l0: shape (512, 7616) | grad norm: 0.147437
Param lstm.weight_hh_l0: shape (512, 128) | grad norm: 0.024226
Param lstm.bias_ih_l0: shape (512,) | grad norm: 0.014057
Param lstm.bias_hh_l0: shape (512,) | grad norm: 0.014057
Param lstm.weight_ih_l0_reverse: shape (512, 7616) | grad norm: 0.060732
Param lstm.weight_hh_l0_reverse: shape (512, 128) | grad norm: 0.000000
Param lstm.bias_ih_l0_reverse: shape (512,) | grad norm: 0.005791
Param lstm.bias_hh_l0_reverse: shape (512,) | grad norm: 0.005791
Param fc1.weight: shape

KeyboardInterrupt: 

In [11]:
from utils.model_functions import train_torch_model_binary

train_torch_model_binary(model, criterion, optimizer, X_train, y_train, X_val, y_val, writer=writer)

Prediction stats: Min=0.590, Max=0.636, Mean=0.615
Prediction stats: Min=0.546, Max=0.611, Mean=0.575
Prediction stats: Min=0.521, Max=0.607, Mean=0.553


KeyboardInterrupt: 

## Ewaluacja modelu

In [None]:
from utils.model_functions import eval_torch_model_binary

eval_torch_model_binary(model, criterion, X_test, y_test)

Test Loss: 0.0385, Test Accuracy: 1.0000


# MODEL SEGLEARN

In [9]:
from xgboost import XGBClassifier
from seglearn.pipe import Pype
from seglearn.transform import FeatureRep, Segment

### Przekształcenie danych na wektor płaski połączonych współrzędnych

In [15]:
X_train_np = X_train.numpy()
X_val_np = X_val.numpy()
X_test_np = X_test.numpy()
y_train_np = y_train.numpy()
y_val_np = y_val.numpy()
y_test_np = y_test.numpy()

In [16]:
def flatten_landmarks(data):
    n_samples, n_timesteps, n_landmarks, n_coords = data.shape
    return data.reshape(n_samples, n_timesteps, n_landmarks * n_coords)

X_train_flat = flatten_landmarks(X_train_np)
X_val_flat = flatten_landmarks(X_val_np)
X_test_flat = flatten_landmarks(X_test_np)

In [17]:
print(X_train_flat.shape, y_train_np.shape)

(224, 1679, 956) (224,)


### Budowa modelu

In [18]:
pipe = Pype([
    ("segment", Segment(width=20, step=10)),  # Segmentacja sekwencji
    ("features", FeatureRep()),              # Ekstrakcja cech
    ("xgb", XGBClassifier(
        eval_metric='logloss',
        n_estimators=200
    ))
])

### Trening modelu

In [19]:
pipe.fit(X_train_flat, y_train_np)

### Ewaluacja modelu

In [20]:
val_accuracy = pipe.score(X_val_flat, y_val_np)
test_accuracy = pipe.score(X_test_flat, y_test_np)

print(f"Dokładność na zbiorze walidacyjnym: {val_accuracy:.2f}")
print(f"Dokładność na zbiorze testowym: {test_accuracy:.2f}")

Dokładność na zbiorze walidacyjnym: 0.36
Dokładność na zbiorze testowym: 0.40


# TODYNET

### Przygotowanie danych

In [27]:
TodyNet_DATA_PATH = os.path.join("..", "..", "src", "external", "TodyNet", "data", "UCR", "MIAMI_DECEPTION")

os.makedirs(TodyNet_DATA_PATH, exist_ok=True)

In [28]:
X_train_tensor = torch.tensor(X_train_flat, dtype=torch.float32).unsqueeze(1)  # adding channel dimension
X_val_tensor = torch.tensor(X_val_flat, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test_flat, dtype=torch.float32).unsqueeze(1)

# Save the data in PyTorch (.pt) format
torch.save(X_train_tensor, os.path.join(TodyNet_DATA_PATH, 'X_train.pt'))
torch.save(X_val_tensor, os.path.join(TodyNet_DATA_PATH, 'X_valid.pt'))
torch.save(X_test_tensor, os.path.join(TodyNet_DATA_PATH, 'X.pt'))

# Save the labels in PyTorch (.pt) format
torch.save(y_train, os.path.join(TodyNet_DATA_PATH, 'y_train.pt'))
torch.save(y_val, os.path.join(TodyNet_DATA_PATH, 'y_valid.pt'))
torch.save(y_test, os.path.join(TodyNet_DATA_PATH, 'y.pt'))

In [29]:
X_train_tensor.shape

torch.Size([224, 1, 1679, 956])

### Trening modelu [pool_ratio 0.8, ponieważ rozmiar danych jest zbyt duży na 0.2]

In [None]:
# cd .\src\external\TodyNet\src\ & python train.py --dataset='MIAMI_DECEPTION' --pool_ratio 0.8