In [None]:
import CNN_dataset
from wettbewerb import load_references, get_3montages
import os

train_folder = "../shared_data/training"
output_folder = "data_test"
os.makedirs(output_folder, exist_ok=True)
files = [f for f in os.listdir(train_folder) if f.endswith('.mat')]
n_files = len(files)
print(f"found {n_files} files")

index = 0
for i in range(0, n_files, 100):
    ids, channels, data, sampling_frequencies, reference_systems, eeg_labels = load_references(train_folder, i)
    CNN_dataset.create_cnn_dataset_map(ids, channels, data, sampling_frequencies, reference_systems, eeg_labels,output_folder, i)
    print(f"created dataset {index}")
    index = index + 1

In [None]:
import random
import importlib
import torch
import os
from torch.utils.data import random_split, DataLoader, ConcatDataset, Subset,TensorDataset
import matplotlib.pyplot as plt
from collections import Counter
from glob import glob
import torch.nn as nn 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,f1_score
import csv
from collections import defaultdict
from sklearn.model_selection import StratifiedGroupKFold
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd

# Datenordner einladen:
data_folder = "data_test"
file_paths = sorted(glob(os.path.join(data_folder, "*.pt")))

if not os.path.exists(data_folder):
    raise FileNotFoundError("Unterordner nicht gefunden")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Modell instantiieren

train_losses = []
train_accuracies = []
test_accuracies = []
metrics = []
batch_nr = 0
train_dataset_global = []
test_dataset_global =[]

all_x = []
all_y = []
all_id = []

for file_path in file_paths:
    dataset = torch.load(file_path)
    for x, y, gruppe in dataset:
        if not isinstance(x, torch.Tensor):
            x = torch.tensor(x)
        all_x.append(x)
        all_y.append(int(y))
        all_id.append(gruppe)
        batch_nr = batch_nr + 1

# In NumPy konvertieren
all_x_np = np.stack([x.numpy() for x in all_x])
all_y_np = np.array(all_y)
all_id_np = np.array(all_id)
    
# DataFrame erstellen
df = pd.DataFrame({
    'x': list(all_x_np),  # wichtig: Liste von Arrays
    'y': all_y_np,
    'id': all_id_np
})
# stratified == erhält Klassengewichtung für alle Folds und Groupkfold = keine Überschneidung Patienten
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=42)

num_epochs = 30
for fold, (train_idx, test_idx) in enumerate(cv.split(df['x'], df['y'], df['id'])):
    print(f"\n=== Fold {fold+1} ===")

    train_df = df.iloc[train_idx]
    test_df = df.iloc[test_idx]
    '''
    # Balancieren der Testdaten
    train_pos = train_df[train_df['y'] == 1]
    train_neg = train_df[train_df['y'] == 0].sample(len(train_pos), random_state=42)
    train_bal = pd.concat([train_pos, train_neg]).sample(frac=1, random_state=42)
    
    X_train = np.stack(train_bal['x'].values)
    y_train = train_bal['y'].values
    
    
    X_test = np.stack(test_df['x'].values)
    y_test = test_df['y'].values
    '''
    X_train = np.stack(train_df['x'].values)
    y_train = train_df['y'].values
    
    X_test = np.stack(test_df['x'].values)
    y_test = test_df['y'].values
    
    # Berechnung der Klassengewichte
    classes = np.unique(all_y_np)
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
    class_weights = torch.tensor(weights, dtype=torch.float).to(device)

    #Modell instantiieren
    import CNN_model
    importlib.reload(CNN_model)
    model = CNN_model.CNN_EEG(in_channels=11, n_classes=2)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay = 1e-4)
    num_epochs = 30


    # Wenn X_train und y_train numpy arrays sind:
    X_train_tensor = torch.from_numpy(X_train).float()
    y_train_tensor = torch.from_numpy(y_train).long()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

    # Gleiches für Testdaten:
    X_test_tensor = torch.from_numpy(X_test).float()
    y_test_tensor = torch.from_numpy(y_test).long()

    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    for x, y in train_loader:
        print("x NaN:", torch.isnan(x).any())
        print("x Inf:", torch.isinf(x).any())
        print("y NaN:", torch.isnan(y).any())
        print("y Inf:", torch.isinf(y).any())
        print("x stats - min:", x.min().item(), "max:", x.max().item(), "mean:", x.mean().item(), "std:", x.std().item())
        break

    print(f"starting training on {device}")
    
    # Metrics tracking
    fold_train_losses = []
    fold_train_accuracies = []
    fold_test_accuracies = []

    #Training 
    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = CNN_model.train_model(model, train_loader, optimizer, loss_fn,device)
        test_acc, y_true, y_pred = CNN_model.evaluate_model(model, test_loader,device)
        
        fold_train_losses.append(train_loss)
        fold_train_accuracies.append(train_acc)
        fold_test_accuracies.append(test_acc)
    
    # Save metrics for this fold
    train_losses.append(fold_train_losses)
    train_accuracies.append(fold_train_accuracies)
    test_accuracies.append(fold_test_accuracies)
     
    # Confusion Matrix of one fold
    cm = confusion_matrix(y_true, y_pred)
    metrics.append((test_acc,train_acc,y_pred,cm))

    print(f"Metrics last epoch,fold: {fold} test_acc: {test_acc}, train_acc: {train_acc}")
    
    
    data = data_folder.split("/")[1]
    path = "models_strat/"
    save_path = path + data #Hier ändern für Ordner
    os.makedirs(save_path, exist_ok=True)  # Verzeichnis erstellen, falls es noch nicht existiert

    torch.save(model, os.path.join(save_path, f"model_{fold}.pth"))

print("finished training")

#Print final metrics and confusion matrix
for fold, (test_acc, train_acc, y_pred, cm) in enumerate(metrics):
    print(f"Fold {fold+1}")
    print(f"  Test accuracy:  {test_acc:.2f}")
    print(f"  Train accuracy: {train_acc:.2f}")
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Kein Anfall", "Anfall"])
    plt.figure(figsize=(6, 6))
    disp.plot(cmap=plt.cm.Blues, values_format='d')
    plt.title("Confusion Matrix (Test Set)")
    plt.grid(False)
    plt.show()

# Plot metrics per fold
epochs = list(range(1, num_epochs + 1))
for fold in range(len(train_losses)):
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1,2,1)
    plt.plot(epochs, train_losses[fold], label='Train Loss')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Training Loss - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1,2,2)
    plt.plot(epochs, train_accuracies[fold], label='Train Accuracy')
    plt.plot(epochs, test_accuracies[fold], label='Test Accuracy')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Accuracy - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot average across folds
mean_train_loss = np.mean(train_losses, axis=0)
mean_train_acc = np.mean(train_accuracies, axis=0)
mean_test_acc = np.mean(test_accuracies, axis=0)

plt.figure(figsize=(15, 5))
plt.subplot(1,2,1)
plt.plot(epochs, mean_train_loss, label='Avg Train Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Average Training Loss Across Folds")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(epochs, mean_train_acc, label='Avg Train Accuracy')
plt.plot(epochs, mean_test_acc, label='Avg Test Accuracy')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Average Training Accuracy Across Folds")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import os
import torch
from torch.nn.utils.rnn import pad_sequence
from scipy.signal import resample
from wettbewerb import get_6montages

def process_sample(row, target_fs=64):
    fs = row["fs"]
    raw_data = row["data"]      # (T, N)
    all_channels = row["channels"]
    label = row["label"]  
    
    #data_np = np.stack(raw_data, axis=1).astype(np.float32)
    montages, montage_data, montage_missing = get_6montages(all_channels, raw_data)      # (T, 6)

    # Downsampling
    T_new = int(len(montage_data[1]) * target_fs / fs)
    data = resample(montage_data, T_new, axis=1)
    data = data.T
    # Zielmaske (Onset = 1 ab Zeitpunkt)
    onset_s = label[1]
    onset_idx = int(onset_s * target_fs)
    y = torch.zeros(T_new, dtype=torch.float32)
    y[onset_idx:] = 1.0

    x = torch.tensor(data, dtype=torch.float32)  # (T, 6)
    return x, y, T_new

def split_and_save_tensor_batches(pickle_path, output_dir, batch_size=64, target_fs=64, random_seed=42):
    print("Lade Pickle-Datei...")
    df = pd.read_pickle(pickle_path)
    print(f"DataFrame Größe: {df.shape}")

    # Shuffle
    df = df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
    print("DataFrame geshufflet.")

    os.makedirs(output_dir, exist_ok=True)
    num_batches = int(np.ceil(len(df) / batch_size))
    print(f"Speichere {num_batches} Batches mit Batch-Größe {batch_size} ...")

    for i in range(num_batches):
        batch_df = df.iloc[i*batch_size : (i+1)*batch_size]

        x_list, y_list, len_list = [], [], []

        for _, row in batch_df.iterrows():
            try:
                x, y, L = process_sample(row, target_fs)
                x_list.append(x)
                y_list.append(y)
                len_list.append(L)
            except Exception as e:
                print(f"Fehler in Zeile {row['ids']}: {e}")
                continue

        x_padded = pad_sequence(x_list, batch_first=True)      # (B, T_max, 6)
        y_padded = pad_sequence(y_list, batch_first=True)      # (B, T_max)
        lengths = torch.tensor(len_list, dtype=torch.long)     # (B,)

        batch_tensor = {
            "x": x_padded,
            "y": y_padded,
            "lengths": lengths
        }

        batch_path = os.path.join(output_dir, f"batch_{i:03d}.pt")
        torch.save(batch_tensor, batch_path)
        print(f"Batch {i+1}/{num_batches} gespeichert: {batch_path}")

    print("Fertig.")

# Beispiel-Nutzung
batch_dir  = "LSTM/positive_filtered_100Hz.pkl"
output_dir = "LSTM/tensor_batches"
batch_size = 64

split_and_save_tensor_batches(batch_dir, output_dir, batch_size)


In [4]:
import os
print(os.getcwd())
os.chdir("/home/jupyter-wki_team_3/wki-sose25")

/home/jupyter-wki_team_3/wki-sose25


In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import os


class OnsetLSTM(nn.Module):
    def __init__(self, input_dim=6, hidden_dim=64, num_layers=2):
        super(OnsetLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.classifier = nn.Linear(hidden_dim * 2, 1)

    def forward(self, x):
        out, _ = self.lstm(x)              # (B, T, H*2)
        out = self.classifier(out).squeeze(-1)  # (B, T)
        return out  # Logits, keine Sigmoid hier!

# Dataset-Klasse zum Laden der gespeicherten .pt-Batches
class EEGOnsetBatchDataset(Dataset):
    def __init__(self, batch_folder):
        self.batch_paths = sorted([os.path.join(batch_folder, f) for f in os.listdir(batch_folder) if f.endswith('.pt')])

    def __len__(self):
        return len(self.batch_paths)

    def __getitem__(self, idx):
        return torch.load(self.batch_paths[idx])

# Training & Evaluation
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    for batch in dataloader:
        x = batch['x'].to(device)
        y = batch['y'].to(device)
        mask = torch.arange(x.shape[1], device=device)[None, :] < batch['lengths'][:, None].to(device)  # (B, T)

        optimizer.zero_grad()
        logits = model(x)  # (B, T)
        loss = criterion(logits[mask], y[mask])
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(dataloader)

@torch.no_grad()
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    for batch in dataloader:
        x = batch['x'].to(device)
        y = batch['y'].to(device)
        mask = torch.arange(x.shape[1], device=device)[None, :] < batch['lengths'][:, None].to(device)

        logits = model(x)
        loss = criterion(logits[mask], y[mask])
        total_loss += loss.item()
    return total_loss / len(dataloader)

# Hauptfunktion
def run_training(train_dir, val_dir, epochs=10, batch_size=1, lr=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device("cpu")

    train_dataset = EEGOnsetBatchDataset(train_dir)
    val_dataset = EEGOnsetBatchDataset(val_dir)
    
    train_loader = DataLoader(train_dataset, batch_size = 1, shuffle=True,collate_fn=lambda x: x[0])
    val_loader = DataLoader(val_dataset, batch_size=1,collate_fn=lambda x: x[0])

    model = OnsetLSTM().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs + 1):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        val_loss = evaluate(model, val_loader, criterion, device)

        print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    torch.save(model.state_dict(), "onset_lstm.pt")
    print("Modell gespeichert.")

# Beispielaufruf
run_training("LSTM/tensor_batches/train", "LSTM/tensor_batches/val", epochs=15)


In [7]:
import torch
batch = torch.load("LSTM/tensor_batches/train/batch_010.pt")
print(batch.keys())
print(batch['x'].shape)  # z. B. (64, 7331, 6)
print(batch['y'].shape)

dict_keys(['x', 'y', 'lengths'])
torch.Size([64, 107651, 6])
torch.Size([64, 107651])
