In [13]:
# Import necessary libraries
import pytorch_lightning as pl
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.model_selection import train_test_split
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import torchmetrics
import torch.optim as optim
import pickle


%matplotlib inline

In [14]:
# Open the file in read mode
with open('sequences.pkl', 'rb') as file:
    # Load the list of tuples from the file
    sequences = pickle.load(file)
sequences[0][0]

Unnamed: 0,TimeStamp,X_gaze,Y_gaze,Pupil radius,DVA
11917214,2794,520.933333,404.400000,5230.0,5.843608
11917215,2796,521.466667,405.466667,5229.0,5.820253
11917216,2798,524.400000,408.200000,5229.0,5.726847
11917217,2800,529.466667,412.466667,5228.0,5.570816
11917218,2802,535.200000,416.266667,5227.0,5.405634
...,...,...,...,...,...
11918089,4544,335.533333,265.200000,4910.0,11.442557
11918090,4546,335.733333,265.000000,4916.0,11.441182
11918091,4548,336.800000,265.000000,4921.0,11.419551
11918092,4550,336.933333,264.933333,4919.0,11.417745


In [15]:
train_seq, test_seq = train_test_split(sequences, random_state=420, test_size=0.2)
len(train_seq), len(test_seq)

(3964, 991)

Dataset

In [16]:
class EyeTrackingDataset(Dataset):

    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]
        return dict(
            sequences=torch.Tensor(sequence.to_numpy()),
            label=torch.tensor(label).long()
        )

In [17]:
class EyeTrackingDataModule(pl.LightningDataModule):

    def __init__(self, train_sequence, test_sequence, batch_size):
        super().__init__()
        self.train_sequence = train_sequence
        self.test_sequence = test_sequence
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_sequence = EyeTrackingDataset(self.train_sequence)
        self.test_sequence = EyeTrackingDataset(self.test_sequence)

    def train_dataloader(self):
        return DataLoader(self.train_sequence, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.test_sequence, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_sequence, batch_size=self.batch_size, shuffle=False)

In [18]:
N_EPOCHS = 250
BATCH_SIZE = 64 # ?

data_module = EyeTrackingDataModule(train_seq, test_seq, BATCH_SIZE)

Model

In [19]:
class SequenceModel(nn.Module):

    def __init__(self, n_features, n_classes, n_hidden=256, n_layers=3):
        super().__init__()

        self.n_hidden = n_hidden

        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            num_layers=n_layers,
            batch_first=True,
            dropout=0.75
        )

        self.classifier = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        self.lstm.flatten_parameters()
        _, (hidden, _) = self.lstm(x)

        out = hidden[-1]
        return self.classifier(out)

In [20]:
class EyeTrackingPredictor(pl.LightningModule):

    def __init__(self, n_features:int, n_classes:int):
        super().__init__()
        self.model = SequenceModel(n_features, n_classes)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x, labels=None):
        output = self.model(x)
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels)
        return loss, output

    def training_step(self, batch, batch_idx):
        sequences = batch["sequences"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = torchmetrics.functional.accuracy(predictions, labels, task="binary")
        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def validation_step(self, batch, batch_idx):
        sequences = batch["sequences"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = torchmetrics.functional.accuracy(predictions, labels, task="binary")

        self.log("validation_loss", loss, prog_bar=True, logger=True)
        self.log("validation_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def test_step(self, batch, batch_idx):
        sequences = batch["sequences"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = torchmetrics.functional.accuracy(predictions, labels, task="binary")

        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr = 0.0001)

In [21]:
model = EyeTrackingPredictor(
    n_features=len(sequences[0][0].columns),
    n_classes=2
)

In [22]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Launching TensorBoard...

In [23]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="validation_loss",
    mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="EyeTracking")

trainer = pl.Trainer(logger=logger, callbacks=checkpoint_callback, max_epochs=N_EPOCHS, enable_progress_bar=True)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, data_module)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | SequenceModel    | 1.3 M 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
1.3 M     Trainable params
0         Non-trainable params
1.3 M     Total params
5.290     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 0, global step 62: 'validation_loss' reached 0.69299 (best 0.69299), saving model to 'C:\\Users\\Daniel\\projects\\EyeTrackingForEpisodicMemory\\Gaze\\src\\classification\\checkpoints\\best-checkpoint-v2.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 1, global step 124: 'validation_loss' reached 0.69248 (best 0.69248), saving model to 'C:\\Users\\Daniel\\projects\\EyeTrackingForEpisodicMemory\\Gaze\\src\\classification\\checkpoints\\best-checkpoint-v2.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 2, global step 186: 'validation_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 3, global step 248: 'validation_loss' was not in top 1
