In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import signal as sig
from wettbewerb import load_references
import CNN_dataset

In [None]:
training_folder = "../shared_data/training_mini"
ids, channels, data, sampling_frequencies, reference_systems, eeg_labels = load_references(training_folder)

In [None]:
CNN_dataset.create_cnn_dataset_map(ids, channels, data, sampling_frequencies, reference_systems, eeg_labels)

In [None]:
import torch 

dataset = torch.load("cnn_map_dataset.pt")
x, y = dataset[12]

print("Label:", y.item() if torch.is_tensor(y) else y)
print("Input shape:", x.shape)
print("Feature map:\n", x[0])

In [None]:
import random
import torch
import os
from torch.utils.data import random_split, DataLoader
import matplotlib.pyplot as plt
from collections import Counter
dataset_path = "cnn_map_dataset.pt"
if not os.path.exists(dataset_path):
    raise FileNotFoundError("Run CNN_dataset.py, um Dataset zu erstellen")
    
dataset = torch.load(dataset_path)
print("Dataset geladen")
labels = [int(tens[1].item()) for tens in dataset]
print("Label distribution:", Counter(labels))

# durch Fensterung kommt es zu viel mehr negativen Samples
positive = [g for g in dataset if g[1].item() == 1]
negative = [g for g in dataset if g[1].item() == 0]

# Gleich viele negative wie positive behalten
negative = random.sample(negative, len(positive))
print(f"postiv {len(positive)}, negative {len(negative)}")
balanced_data = positive + negative
random.shuffle(balanced_data)

# Train Test split the Data 
train_size = int(0.8 * len(balanced_data))
test_size = len(balanced_data) - train_size
torch.manual_seed(42)
train_dataset, test_dataset = random_split(balanced_data, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size =32, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size =32, shuffle = False)


import CNN_model

model = CNN_model.CNN_EEG(in_channels=9, n_classes=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()
num_epochs = 20
train_losses = []
train_accuracies = []
test_accuracies = []

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = CNN_model.train_model(model, train_loader, optimizer, loss_fn)
    test_acc, _, _ = CNN_model.evaluate_model(model, test_loader)

    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")
    
torch.save(model, "small_trained_cnn_weights.pth")


plt.figure(figsize=(15, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.grid(True)
plt.legend()

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy')
plt.grid(True)
plt.legend()


plt.tight_layout()
plt.show()

In [None]:
import random
import importlib
import torch
import os
from torch.utils.data import random_split, DataLoader, ConcatDataset
import matplotlib.pyplot as plt
from collections import Counter
from glob import glob
import torch.nn as nn 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,f1_score
import csv
import numpy as np
# Datenordner einladen:
data_folder = "data_long"  
file_paths = sorted(glob(os.path.join(data_folder, "*.pt")))

if not os.path.exists(data_folder):
    raise FileNotFoundError("Unterordner nicht gefunden")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Modell instantiieren
import CNN_model
importlib.reload(CNN_model)
model = CNN_model.CNN_EEG(in_channels=9, n_classes=2)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50
train_losses = []
train_accuracies = []
test_accuracies = []
batch_nr = 0
train_dataset_global = []
test_dataset_global =[]
for file_path in file_paths:
    
    print(f"Lade Dataset {batch_nr}")
    dataset = torch.load(file_path)
    print(f"Dataset {batch_nr} geladen")
    
    random.shuffle(dataset)

    # Train Test split the Data 
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    torch.manual_seed(42)
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    test_dataset_global.append(test_dataset)
    train_dataset_global.append(train_dataset)
    
    batch_nr = batch_nr +1
    

train_dataset_global = ConcatDataset(train_dataset_global)
test_dataset_global = ConcatDataset(test_dataset_global)
train_loader = DataLoader(train_dataset_global, batch_size =32, shuffle = True)
test_loader = DataLoader(test_dataset_global, batch_size =32, shuffle = False)

# Dynamische Gewichtung der Klassen
all_labels = [label.item() for _, label in train_dataset_global]
label_counts = Counter(all_labels)
total_samples = sum(label_counts.values())
num_classes = len(label_counts)

weights = [total_samples / (num_classes * label_counts[i]) for i in range(num_classes)]
weights[1] *= 3.0
weights = torch.tensor(weights, dtype=torch.float32).to(device)

print(f"Klassenverteilung: {label_counts}")
print(f"Dynamische Verlustgewichte: {weights}")

loss_fn = nn.CrossEntropyLoss(weight=weights)
    
for x, y in train_loader:
    print("x NaN:", torch.isnan(x).any())
    print("x Inf:", torch.isinf(x).any())
    print("y NaN:", torch.isnan(y).any())
    print("y Inf:", torch.isinf(y).any())
    print("x stats - min:", x.min().item(), "max:", x.max().item(), "mean:", x.mean().item(), "std:", x.std().item())
    break

print(f"starting training on {device}")   

#Training 
for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = CNN_model.train_model(model, train_loader, optimizer, loss_fn,device)
    test_acc, y_true, y_pred = CNN_model.evaluate_model(model, test_loader,device)

    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    cm = confusion_matrix(y_true, y_pred)

    print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")
 
torch.save(model, "trained_cnn_1.pth")
print("finished, model saved")

# Display Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Kein Anfall", "Anfall"])
plt.figure(figsize=(6, 6))
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title("Confusion Matrix (Test Set)")
plt.grid(False)
plt.show()

#Trainingsverlauf plotten
epochs = range(1, len(train_losses) + 1)

#Prozess plotten
plt.figure(figsize=(15, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.grid(True)
plt.legend()

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy')
plt.grid(True)
plt.legend()


plt.tight_layout()
plt.show()

hyperparams = {
    "model": "trained_cnn_1", #Händisch anpassen
    "learning_rate": optimizer.param_groups[0]['lr'], 
    "weight_decay": 0.0,
    "loss_function": "CrossEntropyLoss", #muss händisch angepasst werden jenachdem welcher loss verwendet worden ist
    "class_weights": weights.cpu().numpy().tolist(),  # if using torch weights
    "oversampling": 3.0, #Händisch anpassen, Wert von Zeile 66
    "num_epochs": num_epochs,
    "batch_size": 32,
    "threshold": 0.3,  # if applicable
    "gamma": None,  # set to 2.0 if using FocalLoss
    "notes": "Balanced CE with dynamic weights",
    "f1_score": f1_score(y_true, y_pred, average='binary')
}

os.makedirs("results", exist_ok=True)
file_path = "results/hyperparameters_log.csv"

# Write header only if file does not exist
write_header = not os.path.exists(file_path)

with open(file_path, mode='a', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=hyperparams.keys())
    if write_header:
        writer.writeheader()
    writer.writerow(hyperparams)


In [5]:
#stratified group split

import random
import importlib
import torch
import os
from torch.utils.data import random_split, DataLoader, ConcatDataset, Subset,TensorDataset
import matplotlib.pyplot as plt
from collections import Counter
from glob import glob
import torch.nn as nn 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,f1_score
import csv
from collections import defaultdict
from sklearn.model_selection import StratifiedGroupKFold
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd

# Datenordner einladen:
data_folder = "data_test"
file_paths = sorted(glob(os.path.join(data_folder, "*.pt")))

if not os.path.exists(data_folder):
    raise FileNotFoundError("Unterordner nicht gefunden")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Modell instantiieren

train_losses = []
train_accuracies = []
test_accuracies = []
metrics = []
batch_nr = 0
train_dataset_global = []
test_dataset_global =[]

all_x = []
all_y = []
all_id = []

for file_path in file_paths:
    dataset = torch.load(file_path)
    for x, y, gruppe in dataset:
        if not isinstance(x, torch.Tensor):
            x = torch.tensor(x)
        all_x.append(x)
        all_y.append(int(y))
        all_id.append(gruppe)
        batch_nr = batch_nr + 1

# In NumPy konvertieren
all_x_np = np.stack([x.numpy() for x in all_x])
all_y_np = np.array(all_y)
all_id_np = np.array(all_id)
    
# DataFrame erstellen
df = pd.DataFrame({
    'x': list(all_x_np),  # wichtig: Liste von Arrays
    'y': all_y_np,
    'id': all_id_np
})
# stratified == erhält Klassengewichtung für alle Folds und Groupkfold = keine Überschneidung Patienten
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=42)

num_epochs = 30
for fold, (train_idx, test_idx) in enumerate(cv.split(df['x'], df['y'], df['id'])):
    print(f"\n=== Fold {fold+1} ===")

    train_df = df.iloc[train_idx]
    test_df = df.iloc[test_idx]
    '''
    # Balancieren der Testdaten
    train_pos = train_df[train_df['y'] == 1]
    train_neg = train_df[train_df['y'] == 0].sample(len(train_pos), random_state=42)
    train_bal = pd.concat([train_pos, train_neg]).sample(frac=1, random_state=42)
    
    X_train = np.stack(train_bal['x'].values)
    y_train = train_bal['y'].values
    
    
    X_test = np.stack(test_df['x'].values)
    y_test = test_df['y'].values
    '''
    X_train = np.stack(train_df['x'].values)
    y_train = train_df['y'].values
    
    X_test = np.stack(test_df['x'].values)
    y_test = test_df['y'].values
    
    # Berechnung der Klassengewichte
    classes = np.unique(all_y_np)
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
    class_weights = torch.tensor(weights, dtype=torch.float).to(device)
    
    for cls, w in zip(classes, weights):
        print(f"Class {cls}: weight = {w:.4f}")

    #Modell instantiieren
    import CNN_model
    importlib.reload(CNN_model)
    model = CNN_model.CNN_EEG(in_channels=18, n_classes=2)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay = 1e-4)
    num_epochs = 30


    # Wenn X_train und y_train numpy arrays sind:
    X_train_tensor = torch.from_numpy(X_train).float()
    y_train_tensor = torch.from_numpy(y_train).long()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

    # Gleiches für Testdaten:
    X_test_tensor = torch.from_numpy(X_test).float()
    y_test_tensor = torch.from_numpy(y_test).long()

    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    for x, y in train_loader:
        print("x NaN:", torch.isnan(x).any())
        print("x Inf:", torch.isinf(x).any())
        print("y NaN:", torch.isnan(y).any())
        print("y Inf:", torch.isinf(y).any())
        print("x stats - min:", x.min().item(), "max:", x.max().item(), "mean:", x.mean().item(), "std:", x.std().item())
        break

    print(f"starting training on {device}")
    
    # Metrics tracking
    fold_train_losses = []
    fold_train_accuracies = []
    fold_test_accuracies = []

    #Training 
    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = CNN_model.train_model(model, train_loader, optimizer, loss_fn,device)
        test_acc, y_true, y_pred = CNN_model.evaluate_model(model, test_loader,device)
        
        fold_train_losses.append(train_loss)
        fold_train_accuracies.append(train_acc)
        fold_test_accuracies.append(test_acc)
    
    # Save metrics for this fold
    train_losses.append(fold_train_losses)
    train_accuracies.append(fold_train_accuracies)
    test_accuracies.append(fold_test_accuracies)
     
    # Confusion Matrix of one fold
    cm = confusion_matrix(y_true, y_pred)
    metrics.append((test_acc,train_acc,y_pred,cm))

    print(f"Metrics last epoch,fold: {fold} test_acc: {test_acc}, train_acc: {train_acc}")
    
    
    data = data_folder.split("/")[1]
    path = "models_strat/"
    save_path = path + data #Hier ändern für Ordner
    os.makedirs(save_path, exist_ok=True)  # Verzeichnis erstellen, falls es noch nicht existiert

    torch.save(model, os.path.join(save_path, f"model_{fold}.pth"))

print("finished training")

#Print final metrics and confusion matrix
for fold, (test_acc, train_acc, y_pred, cm) in enumerate(metrics):
    print(f"Fold {fold+1}")
    print(f"  Test accuracy:  {test_acc:.2f}")
    print(f"  Train accuracy: {train_acc:.2f}")
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Kein Anfall", "Anfall"])
    plt.figure(figsize=(6, 6))
    disp.plot(cmap=plt.cm.Blues, values_format='d')
    plt.title("Confusion Matrix (Test Set)")
    plt.grid(False)
    plt.show()

# Plot metrics per fold
epochs = list(range(1, num_epochs + 1))
for fold in range(len(train_losses)):
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1,2,1)
    plt.plot(epochs, train_losses[fold], label='Train Loss')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Training Loss - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1,2,2)
    plt.plot(epochs, train_accuracies[fold], label='Train Accuracy')
    plt.plot(epochs, test_accuracies[fold], label='Test Accuracy')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Accuracy - Fold {fold+1}")
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot average across folds
mean_train_loss = np.mean(train_losses, axis=0)
mean_train_acc = np.mean(train_accuracies, axis=0)
mean_test_acc = np.mean(test_accuracies, axis=0)

plt.figure(figsize=(15, 5))
plt.subplot(1,2,1)
plt.plot(epochs, mean_train_loss, label='Avg Train Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Average Training Loss Across Folds")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(epochs, mean_train_acc, label='Avg Train Accuracy')
plt.plot(epochs, mean_test_acc, label='Avg Test Accuracy')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Average Training Accuracy Across Folds")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()





=== Fold 1 ===
Class 0: weight = 0.5108
Class 1: weight = 23.5518
x NaN: tensor(False)
x Inf: tensor(False)
y NaN: tensor(False)
y Inf: tensor(False)
x stats - min: -4.2335662841796875 max: 4.242640495300293 mean: 0.004964651074260473 std: 0.8622854948043823
starting training on cuda


KeyboardInterrupt: 

In [27]:
#ausprobieren 12.06, einlesen und trainieren der fertigen Datensätze

import random
import importlib
import torch
import os
from torch.utils.data import random_split, DataLoader, ConcatDataset, Subset
import matplotlib.pyplot as plt
from collections import Counter
from glob import glob
import torch.nn as nn 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
import csv
from collections import defaultdict


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Modell instantiieren
import CNN_model
importlib.reload(CNN_model)
model = CNN_model.CNN_EEG(in_channels=9, n_classes=2)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4) # weight decay hinzugefügt, dadurch wesentlich besser
num_epochs = 50
train_losses = []
train_accuracies = []
test_accuracies = []

#einlesen der davor abgespeicherten Datensätze
train_dataset_global= torch.load('data_plv/train_dataset.pt')
test_dataset_global = torch.load('data_plv/test_dataset.pt')

train_loader = DataLoader(train_dataset_global, batch_size =32, shuffle = True)
test_loader = DataLoader(test_dataset_global, batch_size =32, shuffle = False)
loss_fn = nn.CrossEntropyLoss()
    
for x, y in train_loader:
    print("x NaN:", torch.isnan(x).any())
    print("x Inf:", torch.isinf(x).any())
    print("y NaN:", torch.isnan(y).any())
    print("y Inf:", torch.isinf(y).any())
    print("x stats - min:", x.min().item(), "max:", x.max().item(), "mean:", x.mean().item(), "std:", x.std().item())
    break

print(f"starting training on {device}")   

#Training 
for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = CNN_model.train_model(model, train_loader, optimizer, loss_fn,device)
    test_acc, y_true, y_pred = CNN_model.evaluate_model(model, test_loader,device)
    print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")
    
train_losses.append(train_loss)
train_accuracies.append(train_acc)
test_accuracies.append(test_acc)
cm = confusion_matrix(y_true, y_pred)

 
torch.save(model, "trained_cnn_balanced_12_06_full.pth")
print("finished, model saved")

# Display Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Kein Anfall", "Anfall"])
plt.figure(figsize=(6, 6))
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title("Confusion Matrix (Test Set)")
plt.grid(False)
plt.show()

#Trainingsverlauf plotten
epochs = range(1, len(train_losses) + 1)

#Prozess plotten
plt.figure(figsize=(15, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.grid(True)
plt.legend()

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy')
plt.grid(True)
plt.legend()


plt.tight_layout()
plt.show()

hyperparams = {
    "model": "trained_cnn_balanced_12_06_full.pth", #Händisch anpassen
    "learning_rate": optimizer.param_groups[0]['lr'], 
    "weight_decay": 0.0,
    "loss_function": "CrossEntropyLoss", #muss händisch angepasst werden jenachdem welcher loss verwendet worden ist
    "class_weights": "balanced",  # if using torch weights
    "oversampling": 3.0, #Händisch anpassen, Wert von Zeile 66
    "num_epochs": num_epochs,
    "batch_size": 32,
    "threshold": 0.3,  # if applicable
    "gamma": None,  # set to 2.0 if using FocalLoss
    "notes": "Balanced CE with dynamic weights",
    "f1_score": f1_score(y_true, y_pred, average='binary')
}

os.makedirs("results", exist_ok=True)
file_path = "results/hyperparameters_log.csv"

# Write header only if file does not exist
write_header = not os.path.exists(file_path)

with open(file_path, mode='a', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=hyperparams.keys())
    if write_header:
        writer.writeheader()
    writer.writerow(hyperparams)


NameError: name 'flattened_size' is not defined

In [14]:
import torch

# Lade die Dateien
data1 = torch.load("data_30sec/cnn_map_dataset_0.pt")
data2 = torch.load("datasets/win30_step30/cnn_map_dataset_0.pt")

# Vergleich: gleiche Länge?
print(data1[0][0])
print(data2[0][0])

tensor([[[ 0.0000,  0.5527,  0.0000,  0.0143,  0.0000],
         [ 0.2273,  2.3705, -0.6264, -0.8522, -0.5450],
         [-0.3760, -0.8895, -0.5983, -0.9138, -0.4184],
         [-0.4766,  2.1705, -0.2644, -0.5301,  1.9257],
         [ 0.0000, -0.4653,  0.0000, -0.3050,  0.0000]],

        [[ 0.0000, -0.4063,  0.0000, -0.3615,  0.0000],
         [-0.4719,  1.4411, -1.0062, -0.9261, -0.2618],
         [ 0.2109, -0.9368, -1.0088, -0.8737,  0.3858],
         [ 0.2590,  2.3627, -0.4466, -0.7073,  2.2430],
         [ 0.0000,  0.4830,  0.0000,  0.0216,  0.0000]],

        [[ 0.0000, -0.2605,  0.0000, -0.0735,  0.0000],
         [-0.4514,  1.5096, -0.8715, -0.8254, -0.3707],
         [ 0.1451, -0.7477, -0.9319, -0.9477,  0.1590],
         [-0.2988,  2.6871, -0.6023, -0.5802,  2.0810],
         [ 0.0000,  0.3920,  0.0000, -0.0121,  0.0000]],

        [[ 0.0000, -0.0723,  0.0000, -0.0048,  0.0000],
         [-0.3192,  1.7654, -0.8478, -0.8876, -0.5031],
         [ 0.0613, -0.6298, -0.7827, -0.86