In [None]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class EEGOnsetLSTM(nn.Module):
    def __init__(self, n_channels, hidden_size=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size=n_channels,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=True)
        # Bidirektional → hidden_size * 2
        self.fc = nn.Linear(hidden_size * 2, 1)  # Output: 1 Wert pro Sample (Onset)

    def forward(self, x, lengths):
        # x: [batch, seq_len, n_channels]
        # lengths: [batch] echte Längen ohne Padding

        # Sortiere nach Länge absteigend (notwendig für pack_padded_sequence)
        lengths_sorted, sorted_idx = lengths.sort(descending=True)
        x_sorted = x[sorted_idx]

        # Packe die Sequenzen
        packed_input = pack_padded_sequence(x_sorted, lengths_sorted.cpu(), batch_first=True)

        # LSTM vorwärts
        packed_output, (hn, cn) = self.lstm(packed_input)

        # hn: [num_layers * num_directions, batch, hidden_size]
        # Für bidirektionales LSTM: 2 Richtungen → wir konkateniere die letzten Layerstates

        # Wir nehmen die letzten Layerstates beider Richtungen
        # Layerindex: -1 (letzte Schicht)
        # hn shape: [2, batch, hidden_size]
        # Wir transponieren und flatten zu [batch, hidden_size*2]
        hn = hn.view(self.lstm.num_layers, 2, x.size(0), self.lstm.hidden_size)
        hn_last_layer = hn[-1]  # Form: [2, batch, hidden_size]
        hn_cat = torch.cat((hn_last_layer[0], hn_last_layer[1]), dim=1)  # [batch, hidden_size*2]

        # Rücksortieren, um ursprüngliche Reihenfolge wiederherzustellen
        _, original_idx = sorted_idx.sort()
        hn_cat = hn_cat[original_idx]

        # Fully connected zum Onset (regression)
        output = self.fc(hn_cat).squeeze(1)  # [batch]

        return output

# Beispiel Daten und Labels (Dummy)
batch_size = 3
n_channels = 21
seq_lengths = torch.tensor([1000, 800, 600])  # variable Längen
max_len = seq_lengths.max()

# Zufällige Daten mit Padding (Nullen)
x = torch.zeros(batch_size, max_len, n_channels)
for i, length in enumerate(seq_lengths):
    x[i, :length] = torch.randn(length, n_channels)

# Beispiel-Onsets als Indexwerte (ground truth)
labels = torch.tensor([400, 350, 200], dtype=torch.float32)

# Modell, Optimizer, Loss
model = EEGOnsetLSTM(n_channels)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# Trainingsschritt (ein Beispiel)
model.train()
optimizer.zero_grad()
outputs = model(x, seq_lengths)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

print(f"Predicted Onsets: {outputs.detach().cpu().numpy()}")
print(f"True Onsets: {labels.cpu().numpy()}")
print(f"Loss: {loss.item():.4f}")

In [None]:
# Große Pickle datei einlesen und in batches abspeichern, für verbessertes Training


import pandas as pd
import numpy as np
import os

def split_and_save_batches(pickle_path, output_dir, batch_size=64, random_seed=42):
    # 1. Lade kompletten DataFrame
    print("Lade Pickle-Datei...")
    df = pd.read_pickle(pickle_path)
    print(f"DataFrame Größe: {df.shape}")

    # 2. Shuffle DataFrame
    df = df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
    print("DataFrame geshufflet.")

    # 3. Batchweise speichern
    os.makedirs(output_dir, exist_ok=True)
    num_batches = int(np.ceil(len(df) / batch_size))
    print(f"Speichere {num_batches} Batches mit Batch-Größe {batch_size} ...")

    for i in range(num_batches):
        batch_df = df.iloc[i*batch_size : (i+1)*batch_size]

        batch_path = os.path.join(output_dir, f"batch_{i:03d}.pkl")
        batch_df.to_pickle(batch_path)
        print(f"Batch {i+1}/{num_batches} gespeichert: {batch_path}")

    print("Fertig.")

# Beispiel Nutzung
pickle_path = "positive_filtered_100Hz.pkl"
output_dir = "batches_LSTM"
batch_size = 64

split_and_save_batches(pickle_path, output_dir, batch_size)

In [None]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import os

import torch
from torch.utils.data import Dataset
import pandas as pd
import os

class LazyEEGDataset(Dataset):
    def __init__(self, batch_dir, n_channels=21, eeg_col='data', label_col='label', fixed_batch_size=64):
        self.n_channels = n_channels
        self.batch_files = sorted([os.path.join(batch_dir, f) for f in os.listdir(batch_dir) if f.endswith('.pkl')])
        self.eeg_col = eeg_col
        self.label_col = label_col
        self.fixed_batch_size = fixed_batch_size
        
        # Länge des Datasets = (Anzahl der Dateien - 1) * fixed_batch_size + Größe der letzten Datei
        last_file_df = pd.read_pickle(self.batch_files[-1])
        last_file_len = len(last_file_df)
        self.total_len = (len(self.batch_files) - 1) * fixed_batch_size + last_file_len

    def __len__(self):
        return self.total_len

    def __getitem__(self, idx):
        # Berechne Dateiindex und inneren Index mit fester Batchgröße
        file_idx = idx // self.fixed_batch_size
        inner_idx = idx % self.fixed_batch_size
        
        # Falls Index in der letzten Datei liegt, korrigiere inner_idx
        if file_idx == len(self.batch_files) - 1:
            # Letzte Datei kann kleiner sein als fixed_batch_size
            last_file_df = pd.read_pickle(self.batch_files[file_idx])
            if inner_idx >= len(last_file_df):
                raise IndexError(f"Index {idx} außerhalb der Range der letzten Datei mit {len(last_file_df)} Samples")
            row = last_file_df.iloc[inner_idx]
        else:
            df = pd.read_pickle(self.batch_files[file_idx])
            row = df.iloc[inner_idx]

        seq = row[self.eeg_col]
        if isinstance(seq, list):
            seq = torch.tensor(seq).float()
        elif isinstance(seq, np.ndarray):
            seq = torch.from_numpy(seq.T).float()
        else:
            raise TypeError(f"Sequenzformat {type(seq)} nicht erkannt!")

        seq = self.pad_channels(seq)

        label_full = row[self.label_col]
        onset_label = label_full[1]
        return seq, torch.tensor(onset_label, dtype=torch.float32)

    def pad_channels(self, tensor):
        seq_len, channels = tensor.shape
        if channels == self.n_channels:
            return tensor
        elif channels > self.n_channels:
            raise RuntimeError(f"Tensors Kanäle ({channels}) größer als erwartet ({self.n_channels})")
        else:
            padding = torch.zeros(seq_len, self.n_channels - channels)
            return torch.cat([tensor, padding], dim=1)


def collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = torch.tensor([seq.shape[0] for seq in sequences])
    padded_seqs = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True, padding_value=0.0)
    labels = torch.stack(labels)
    return padded_seqs, lengths, labels


class EEGOnsetLSTM(nn.Module):
    def __init__(self, n_channels, hidden_size=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size=n_channels,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)

    def forward(self, x, lengths):
        lengths_sorted, sorted_idx = lengths.sort(descending=True)
        x_sorted = x[sorted_idx]

        packed_input = pack_padded_sequence(x_sorted, lengths_sorted.cpu(), batch_first=True)
        packed_output, (hn, cn) = self.lstm(packed_input)

        hn = hn.view(self.lstm.num_layers, 2, x.size(0), self.lstm.hidden_size)
        hn_last_layer = hn[-1]
        hn_forward = hn_last_layer[0]
        hn_backward = hn_last_layer[1]
        hn_cat = torch.cat([hn_forward, hn_backward], dim=1)

        _, original_idx = sorted_idx.sort()
        hn_cat = hn_cat[original_idx]

        output = self.fc(hn_cat).squeeze(1)
        return output


def train_model(batch_dir, n_channels, epochs=10, lr=1e-3, batch_size=32, device='cpu'):
    dataset = LazyEEGDataset(batch_dir, n_channels, fixed_batch_size=64)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True, collate_fn=collate_fn, num_workers=4, pin_memory=True)


    model = EEGOnsetLSTM(n_channels).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()

    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for x, lengths, y in dataloader:
            x, lengths, y = x.to(device), lengths.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x, lengths)
            loss = criterion(outputs, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            torch.cuda.empty_cache()
            
            epoch_loss += loss.item() * x.size(0)

        avg_loss = epoch_loss / len(dataset)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

    return model

# Beispiel-Aufruf
batch_dir = "batches_LSTM"
n_channels = 21
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trained_model = train_model(batch_dir, n_channels, epochs=10, batch_size=8, device=device)
torch.save(model.state_dict(), 'eeg_onset_lstm.pth')

In [None]:
import torch
torch.cuda.empty_cache()