In [None]:
import numpy as np
import pandas as pd

# Define the path to the .npy file
file_path = '/mnt/guido-data/mass/xsleepnet/1.npy'
labels_path = '/mnt/guido-data/mass/labels/1.npy'
table_path = '/mnt/guido-data/mass/table.csv'
try:
    table = pd.read_csv(table_path)

    # Load the .npy file using numpy.memmap
    data = np.memmap(file_path, dtype='float32', mode='r')
    y = np.memmap(labels_path, dtype='int16', mode='r')
    # Print the loaded data
    print(y)
except Exception as e:
    print(f"Error loading .npy file: {e}")

    

---

In [2]:
import torch
import torch.nn as nn
from transformers import BertModel, BertConfig
from torch.optim import Adam
import torch.nn.functional as F
from torch.nn.functional import cosine_similarity

In [98]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SectionEncoderHardAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, attention_dim, threshold=0.5):
        super(SectionEncoderHardAttention, self).__init__()
        self.transformer_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=4)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_layer, num_layers=2)
        
        # Hard attention parameters
        self.attention_linear = nn.Linear(input_dim, attention_dim)  # Attention score computation
        self.attention_threshold = threshold  # Threshold for hard attention
        self.linear = nn.Linear(input_dim, hidden_dim)

    def forward(self, section):
        # section is of shape (batch_size, 1, 5, 128), where 5 is the sequence length and 128 is the embedding dimension
        section = section.squeeze(1)  # Remove the extra dimension to make it (batch_size, 5, 128)
        
        # Pass through the transformer encoder
        encoded_section = self.transformer_encoder(section)  # Expects input of shape (batch_size, 5, 128)
        
        # Calculate attention scores for each timestep (5 timesteps)
        attention_scores = self.attention_linear(encoded_section)  # Shape (batch_size, 5, attention_dim)
        attention_weights = F.softmax(attention_scores, dim=1)  # Normalize attention scores
        
        # Apply hard attention by thresholding the attention weights
        attention_mask = (attention_weights > self.attention_threshold).float()  # Binarize attention (hard attention)
        attended_section = encoded_section * attention_mask  # Apply the mask to the section
        print(attended_section)
        # Apply linear transformation to the attended output
        return self.linear(attended_section)

In [85]:
class SectionEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(SectionEncoder, self).__init__()
        self.transformer_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=4)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_layer, num_layers=2)
        self.linear = nn.Linear(input_dim, hidden_dim)

    def forward(self, section):
        # section is of shape (1, 5, 129), where 5 is the sequence length and 129 is the embedding dimension
        section = section.squeeze(1)  # Remove the extra dimension to make it (batch_size, 5, 129)
        
        # Pass through the transformer encoder
        encoded_section = self.transformer_encoder(section)  # Expects input of shape (batch_size, 5, 129)

        # Apply linear transformation to the encoded output
        return self.linear(encoded_section)

In [86]:
class BERT_EEG(nn.Module):
    def __init__(self, section_encoder, bert_model, contrastive_margin=1.0):
        super(BERT_EEG, self).__init__()
        self.section_encoder = section_encoder  # Transformer to encode individual EEG sections
        self.bert = bert_model                  # BERT model to process encodings
        self.contrastive_margin = contrastive_margin
        self.loss_fn = nn.TripletMarginLoss(margin=self.contrastive_margin)
    
    def section_epoch(self, epoch):
        # Eliminate the last row (remove the 129th row) to get shape (1, 29, 128)
        epoch = epoch[:, :, :-1]

        # Split the epoch (1, 29, 128) into sections of (1, 5, 128)
        sections = [epoch[:, i:i + 5, :] for i in range(0, epoch.size(1) - 5 + 1, 5)]

        # If the last section has fewer than 5 timesteps, pad it with zeros
        if epoch.size(0) % 5 != 0:
            last_section = epoch[:, -(epoch.size(1) % 5):, :]  # Assuming epoch is of shape (batch_size, 29, 129)
            padding = torch.zeros((last_section.size(0), 5 - last_section.size(1), last_section.size(2))).to(epoch.device)

            # Concatenate the last_section and the padding
            last_section = torch.cat((last_section, padding), dim=1)

            sections.append(last_section)
        
        return sections
    
    def forward(self, epoch):
        # Section the epochs
        epoch_sections = self.section_epoch(epoch)

        # Encode each section using the section encoder
        epoch_encoding = (torch.stack([self.section_encoder(section) for section in epoch_sections])).view(1, 30, 128)
        epoch_encoding = epoch_encoding[:,:-1,:]
        # Pass the epoch encodings through BERT
        bert_output = self.bert(inputs_embeds=epoch_encoding)
        bert_output = bert_output.last_hidden_state

        return bert_output
    
    def compute_contrastive_loss(self, anchor, positive, negative):
        return self.loss_fn(anchor, positive, negative)


In [87]:
from physioex.data.datamodule import PhysioExDataModule

datamodule = PhysioExDataModule(
    datasets=["hmc"],
    versions=None,
    batch_size=15,
    selected_channels=["EEG"],
    sequence_length=10,
    data_folder="/mnt/guido-data/",
    preprocessing = "xsleepnet",
    target_transform= None
)

[32m2024-10-23 15:09:14.724[0m | [1mINFO    [0m | [36mphysioex.utils.data_folder[0m:[36mset_data_folder[0m:[36m25[0m - [1mData folder set to /mnt/guido-data/[0m


In [None]:
dataloader_iterator = iter(dataloader)
first_batch = next(dataloader_iterator)
inputs, targets = first_batch
print(inputs.shape)
print(targets.shape)

In [None]:
dataloader_iterator = iter(dataloader)
first_batch = next(dataloader_iterator)
inputs, targets = first_batch

# Flatten the inputs to merge batch and sequence dimensions
# Original shape: [15, 10, 1, 29, 129]
# New shape: [15 * 10, 1, 29, 129]
flattened_inputs = inputs.view(-1, 1, 29, 129)

# Create an iterator for the epochs
epochs_iterator = iter(flattened_inputs)

# Example usage: get the first epoch
first_epoch = next(epochs_iterator)
print(first_epoch.shape)  # Should print torch.Size([1, 29, 129])

In [92]:
# Example training loop
def train_epoch(model, optimizer, epochs, loss_function, data_loader):
    i=0
    print=False
    for epoch_idx in range(epochs):
        for inputs, targets in data_loader:
            flattened_inputs = inputs.view(-1, 1, 29, 129)
            epochs_iterator = iter(flattened_inputs)
            next_epoch_iterator = iter(flattened_inputs)
            
            for epoch in epochs_iterator:
                optimizer.zero_grad()
                next_epoch = next(next_epoch_iterator)  # Get the next element from the iterator
                next_epoch = next_epoch[:, :, :-1]

                if next_epoch is None:
                    break

                predicted_next_epoch = model(epoch)
                
                # Compute loss based on the differences between predicted next epoch and actual next epoch     
                loss = loss_function(predicted_next_epoch, next_epoch)
                
                # Backpropagation
                loss.backward()
                optimizer.step()
                if print == True:
                    
                    print = False
            i = i + 1
            print(i)
            
        print = True
        print(f"Epoch {epoch_idx + 1}/{epochs}, Loss: {loss.item()}")

In [100]:
# Create the model
input_dim = 128  # Example dimension, adjust based on your EEG data
hidden_dim = 128
section_encoder = SectionEncoderHardAttention(input_dim, hidden_dim, attention_dim=128, threshold=0.4)
data_loader = datamodule.train_dataloader()

# BERT configuration
bert_config = BertConfig(hidden_size=hidden_dim, num_attention_heads=4, num_hidden_layers=2)
bert_model = BertModel(bert_config)

# Initialize the BERT-EEG model
model = BERT_EEG(section_encoder, bert_model)

# Optimizer
optimizer = Adam(model.parameters(), lr=1e-4)

# Example data_loader creation
# Assuming data_loader returns (current_epoch, next_epoch, negative_epoch)
# current_epoch, next_epoch, and negative_epoch ar§e torch tensors containing EEG sections
# Train the model
epochs = 10

train_epoch(model, optimizer, epochs, loss_function=torch.nn.MSELoss(), data_loader=data_loader)

tensor([[[ 0.0000, -0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000,
          -0.0000, -0.0000, -0.0000, -0.1699,  0.0000, -0.0000, -0.0000,
          -0.0000, -0.0000,  0.0000,  0.0369,  0.0000, -0.0000, -0.0000,
          -0.3218, -0.0000, -0.0000,  0.0000,  0.0000, -0.0000, -0.0000,
          -0.0000,  0.0000, -0.0000,  1.1879,  0.0000,  0.0000,  0.0000,
          -0.0000, -0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000,
           0.0000,  0.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.0000,
          -0.0000,  0.0000, -0.0000,  0.3959, -0.0000, -0.5547, -0.0000,
          -0.0158,  0.0000,  0.0000, -0.0000, -1.8264, -0.0000, -0.0000,
          -0.0000, -0.0593, -0.0000, -0.0000, -0.0000, -0.0000, -0.1699,
          -0.0000, -0.0000, -0.0000,  0.0000,  0.6034, -0.0000,  0.0000,
           0.0000,  0.0000,  0.0000, -0.0000, -0.0964, -0.0000, -0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.0000,
          -0.0000,  0.0000, -0.0000,  0.0000, -0.24

In [70]:
print(data_loader)
print(len(data_loader))

<torch.utils.data.dataloader.DataLoader object at 0x7f506763e920>
6231


In [None]:
import torch
import torch.nn as nn

class SectionEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(SectionEncoder, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=4)  # Adjusted input_dim to 128
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.linear = nn.Linear(input_dim, hidden_dim)

    def forward(self, epoch):
        # Discard the last feature (slice to keep 128 features)
        epoch = epoch[:, :128]  # Now epoch is (29, 128)
        
        # Split the epoch (29, 128) into sections of (5, 128)
        sections = [epoch[i:i + 5] for i in range(0, epoch.size(0) - 5 + 1, 5)]
        
        # If the last section has fewer than 5 timesteps, pad it with zeros
        if epoch.size(0) % 5 != 0:
            last_section = epoch[-(epoch.size(0) % 5):]
            padding = torch.zeros((5 - last_section.size(0), last_section.size(1)))
            last_section = torch.cat((last_section, padding), dim=0)
            sections.append(last_section)

        # Encode each section
        sections = torch.stack([self.transformer_encoder(section) for section in sections])
        return self.linear(sections)

# Example usage
input_dim = 128  # Adjusted input_dim to 128
hidden_dim = 64
epoch = torch.randn(29, 129)  # Example epoch with original 129 features

encoder = SectionEncoder(input_dim, hidden_dim)
encoded_sections = encoder(epoch)
print(encoded_sections.shape)
