In [None]:
'Ich probier jetzt jeden Schritt nochmal von vorne und schau mir den Schritt nochmal genauer an'

from wettbewerb import load_references, get_6montages
from new_preprocess import preprocess_signal_with_montages


ids, channels_list, data_list, fs_list, ref_list, label_list = load_references(folder="../shared_data/training", idx=0)
for i in range(100):
    #montage_names, montage_data, missing = get_6montages(channels_list[i], data_list[i])
    #print(f"{ids[i]}:{montage_names}:{montage_data.shape}\n {fs_list[i]}")
    #if missing:
        #print("Warning: Montage missing, data may be incomplete.")
    #print (f"{ids[i]}",label_list[i])
    processed_signal, montage_missing = preprocess_signal_with_montages(channels_list[i], data_list[i], 256, fs_list[i])
    print(f"{ids[i]}:",processed_signal.shape)

In [None]:
from wettbewerb import EEGDataset

dataset = EEGDataset("../shared_data/training")
labels = dataset.get_labels()

# Count seizure / non-seizure
seizure_count = sum(1 for l in labels if l[0])
non_seizure_count = len(labels) - seizure_count

print(f"Total: {len(labels)}")
print(f"Seizures: {seizure_count}")
print(f"Non-Seizures: {non_seizure_count}")

In [None]:
from wettbewerb import EEGDataset
import os
import torch 
from new_preprocess import preprocess_signal_with_montages
dataset = EEGDataset("../shared_data/training")
save_folder = "preprocessed_data"
os.makedirs(save_folder, exist_ok=True)

processed_count = 0
skipped_count = 0
total = len(dataset)

for i in range(total):
    ids, channels, data, fs, ref, label = dataset[i]

    montage_names, processed_signal, montage_missing, resampled_fs = preprocess_signal_with_montages(
        channels, data, target_fs=256, original_fs=fs, ids=ids
    )

    if montage_missing:
        skipped_count += 1
        print(f"[{i+1}/{total}] Skipping {ids} (montage missing)")
        continue

    save_path = os.path.join(save_folder, f"{ids}.pt")
    torch.save((processed_signal, label, ids, montage_names, resampled_fs), save_path)
    processed_count += 1
    print(f"[{i+1}/{total}] Processed: {processed_count} | Skipped: {skipped_count}", end='\r')

print("Done!")

In [None]:
from wettbewerb import EEGDataset
from new_preprocess import preprocess_signal_with_montages
from new_features import window_eeg_data, feature_extraction_window  # your modules
import os, torch
import numpy as np

window_size = 4  # seconds
step_size = 2    # seconds

dataset = EEGDataset("../shared_data/training")
save_folder = f"montage_datasets/win{window_size}_step{step_size}"
os.makedirs(save_folder, exist_ok=True)

for i in range(len(dataset)):
    eeg_id, channels, raw_data, fs, _, label = dataset[i]
    seizure_label, seizure_onset, seizure_offset = label

    # 1. Preprocess
    montage_names, processed_signal, montage_missing, new_fs = preprocess_signal_with_montages(
        channels, raw_data, target_fs=256, original_fs=fs, ids=eeg_id
    )

    if montage_missing:
        print(f"Skipping {eeg_id} (montage missing)")
        continue

    # 2. Windowing + labeling
    windows, labels, timestamps = window_eeg_data(
        processed_signal, resampled_fs=new_fs,
        seizure_onset=seizure_onset,
        seizure_offset=seizure_offset,
        window_size=window_size,
        step_size=step_size
    )

    # 3. Feature extraction per window
    for idx, (window, lbl, ts) in enumerate(zip(windows, labels, timestamps)):
        features = feature_extraction_window(window, new_fs)
        save_path = os.path.join(save_folder, f"{eeg_id}_win{idx}_lbl{lbl}.pt")
        torch.save((features, lbl, eeg_id, ts), save_path)

    print(f"[{i+1}/{len(dataset)}] Processed {eeg_id} with {len(windows)} windows.", end='\r')

In [None]:
# Aufteilen der Features in zeitliche und spektrale
import torch
import os
from glob import glob
import numpy as np

def split_features(feature_tensor):
    """
    Trennt Features in spektral (0-9) und zeitlich (10-14)
    """
    spectral = feature_tensor[..., :10]         # Indizes 0-9
    temporal = feature_tensor[..., 10:15]       # Indizes 10-14
    return spectral, temporal

def process_feature_files(load_dir, save_dir_spectral, save_dir_temporal):
    os.makedirs(save_dir_spectral, exist_ok=True)
    os.makedirs(save_dir_temporal, exist_ok=True)

    feature_files = glob(os.path.join(load_dir, "*.pt"))

    for file in feature_files:
        data = torch.load(file)

        if isinstance(data, tuple):
            features, label, eeg_id, ts = data
        elif isinstance(data, dict):
            features = data['features']
            label = data['label']
            eeg_id = data['eeg_id']
            ts = data['timestamp']
        else:
            print(f"Unbekanntes Format: {file}")
            continue

        # in numpy falls tensor
        if isinstance(features, torch.Tensor):
            features = features.numpy()

        # flach oder Matrix?
        if features.ndim == 1:
            n_channels = 6  # oder dein tatsächlicher Wert
            features = features.reshape(n_channels, -1)

        spec_feat, time_feat = split_features(features)

        # Optional: flatten
        spec_feat_flat = spec_feat.flatten()
        time_feat_flat = time_feat.flatten()

        base_name = os.path.basename(file)

        # Speichern
        torch.save((spec_feat_flat, label, eeg_id, ts), os.path.join(save_dir_spectral, base_name))
        torch.save((time_feat_flat, label, eeg_id, ts), os.path.join(save_dir_temporal, base_name))

    print(f"Fertig. {len(feature_files)} Dateien verarbeitet.")

# Beispiel:
ordner = "/home/jupyter-wki_team_3/wki-sose25/montage_datasets/"
unterordner = [f for f in os.listdir(ordner) if os.path.isdir(os.path.join(ordner, f)) and not f.startswith('.')]
    
for config in unterordner:
    
    load_dir = "montage_datasets/"+ config
    save_dir_spectral = "data_features_sep/spectral/" + config
    save_dir_temporal = "data_features_sep/temporal/" + config

    process_feature_files(load_dir, save_dir_spectral, save_dir_temporal)
    print(config)

In [1]:
# Code zum Zusammenführen von .pt Dateien -> reduziert die LAdezeit am Anfang des Traiings massiv
import os
import torch
from glob import glob

ordner = "/home/jupyter-wki_team_3/wki-sose25/add_dataset/"
unterordner = [f for f in os.listdir(ordner) if os.path.isdir(os.path.join(ordner, f)) and not f.startswith('.')]
    
for config in unterordner:

    # === Einstellungen ===
    source_dir = "add_dataset/" + config
    target_dir = "add_dataset/combined/" + config
    batch_size = 1000  # Anzahl Dateien pro kombiniertes File

    os.makedirs(target_dir, exist_ok=True)

    # === Alle .pt-Dateien finden ===
    file_paths = sorted(glob(os.path.join(source_dir, "*.pt")))

    combined_samples = []
    file_counter = 0

    for i, file_path in enumerate(file_paths):
        try:
            sample = torch.load(file_path)
            combined_samples.append(sample)

            # Sobald batch_size erreicht ist, speichern
            if len(combined_samples) >= batch_size:
                save_path = os.path.join(target_dir, f"combined_{file_counter}.pt")
                torch.save(combined_samples, save_path)
                combined_samples = []
                file_counter += 1
        except Exception as e:
            print(f"Fehler bei {file_path}: {e}")

    # Rest speichern
    if combined_samples:
        save_path = os.path.join(target_dir, f"combined_{file_counter}.pt")
        torch.save(combined_samples, save_path)
        

    print(f"config {config} gespeichert.")


config win3_step3 gespeichert.
config win2_step2 gespeichert.
config win1_step1 gespeichert.
config win1_step0.5 gespeichert.
config win3_step1 gespeichert.
config win2_step1 gespeichert.


In [9]:
# -*- coding: utf-8 -*-
# VERSION HAT FUNKTIONIERT
"""

Skript testet das vortrainierte Modell


@author:  Maurice Rohr, Dirk Schweickard
"""


import numpy as np
import json
import os
from typing import List, Tuple, Dict, Any
from wettbewerb import get_6montages

# Pakete aus dem Vorlesungsbeispiel
import mne
from scipy import signal as sps
import ruptures as rpt
import torch 
import torch.nn as nn
from CNN_model_copy import CNN_EEG
from new_preprocess import preprocess_signal_with_montages
from features_prediction import window_prediction, feature_extraction_window
#from CNN_dataset import window_data_evaluate, create_fixed_grid_maps
from glob import glob
from scipy.signal import iirnotch, butter, sosfiltfilt, resample_poly, tf2sos


###Signatur der Methode (Parameter und Anzahl return-Werte) darf nicht verändert werden
def predict_labels(channels : List[str], data : np.ndarray, fs : float, reference_system: str, model_name : str='model.json') -> Dict[str,Any]:
    '''
    Parameters
    ----------
    channels : List[str]
        Namen der übergebenen Kanäle
    data : ndarray
        EEG-Signale der angegebenen Kanäle
    fs : float
        Sampling-Frequenz der Signale.
    reference_system :  str
        Welches Referenzsystem wurde benutzt, "Bezugselektrode", nicht garantiert korrekt!
    model_name : str
        Name eures Models,das ihr beispielsweise bei Abgabe genannt habt. 
        Kann verwendet werden um korrektes Model aus Ordner zu laden
    Returns
    -------
    prediction : Dict[str,Any]
        enthält Vorhersage, ob Anfall vorhanden und wenn ja wo (Onset+Offset)
    '''

#------------------------------------------------------------------------------
# Euer Code ab hier  

    # Initialisiere Return (Ergebnisse)
    seizure_present = True # gibt an ob ein Anfall vorliegt
    seizure_confidence = 0.5 # gibt die Unsicherheit des Modells an (optional)
    onset = 4.2   # gibt den Beginn des Anfalls an (in Sekunden)
    onset_confidence = 0.99 # gibt die Unsicherheit bezüglich des Beginns an (optional)
    offset = 999999  # gibt das Ende des Anfalls an (optional)
    offset_confidence = 0   # gibt die Unsicherheit bezüglich des Endes an (optional)

    # Modell Aufsetzen
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #Daten vorbereiten
    window_size = 4
    step_size = 1
    target_fs = 256
    original_fs = fs
 
    
    montage_names, montage_data, montage_missing,target_fs = preprocess_signal_with_montages(channels, data,target_fs,original_fs) 
    
    windows, timestamps = window_prediction(montage_data, target_fs, window_size, step_size)
    data_for_class = []
    # Feature extraction and brain map calculation
    for win in windows:
        features = feature_extraction_window(win, fs) # shape: (n_channels, n_features)
        assert not np.isnan(features).any(), "NaN in features!"
        x = torch.tensor(features, dtype = torch.float)
        data_for_class.append(x)
        

    # Klassifikation
    predictions_per_window =[]
    with torch.no_grad():
        probs = predictions_ensemble(data_for_class ,model_name, device)
        predictions_per_window = [int(p > 0.5) for p in probs]

    seizure_present = False
    seizure_present, onset_candidate = detect_onset(predictions_per_window, timestamps, min_consecutive=2)
    if seizure_present:
        onset = onset_candidate

        
#------------------------------------------------------------------------------  
    prediction = {"seizure_present":seizure_present,"seizure_confidence":seizure_confidence,
                   "onset":onset,"onset_confidence":onset_confidence,"offset":offset,
                   "offset_confidence":offset_confidence}
  
    return prediction # Dictionary mit prediction - Muss unverändert bleiben!
                               
                               
        
def predictions_ensemble(data_for_class: List[torch.Tensor], model_name: str, device: torch.device) -> List[float]:
    file_paths = sorted([os.path.join(model_name, f) for f in os.listdir(model_name) if f.endswith(".pth")])
    batch_tensor = torch.stack(data_for_class).to(device)
    probs = []

    with torch.no_grad():
        for path in file_paths:
            model = CNN_EEG(6, 1).to(device)
            model.load_state_dict(torch.load(path, map_location=device))
            model.eval()
            outputs = torch.sigmoid(model(batch_tensor).squeeze())
            probs.append(outputs.cpu().numpy())  # shape: (num_windows,)

    ensemble_probs = np.mean(probs, axis=0)  # Mittelwert pro Fenster
    return ensemble_probs.tolist()  # Gib Liste von Wahrscheinlichkeiten zurück


def detect_onset(predictions, timestamps, min_consecutive=2):
    predictions = torch.tensor(predictions)
    for i in range(len(predictions) - min_consecutive + 1):
        if torch.all(predictions[i:i+min_consecutive] == 1):
            return True, timestamps[i]
    return False, None



def notch_filter(signal, fs, freq=50.0, Q=30.0):
    w0 = freq / (fs / 2)
    b, a = iirnotch(w0, Q)
    sos = tf2sos(b, a)  # Transferfunktion → SOS
    return sosfiltfilt(sos, signal, axis=-1)


def bandpass_filter(signal, fs, lowcut=1.0, highcut=120.0, order=4):
    sos = sps.butter(order, [lowcut, highcut], btype='band', fs=fs, output='sos')
    return sosfiltfilt(sos, signal, axis=-1)

def resample_signal(signal, original_fs, target_fs=256):
    if original_fs == target_fs:
        return signal
    gcd = np.gcd(int(original_fs), int(target_fs))
    up = int(target_fs // gcd)
    down = int(original_fs // gcd)
    return resample_poly(signal, up, down, axis=-1)

In [10]:
from wettbewerb import load_references
train_folder = "../shared_data/training_mini" 
ids, channels, data, sampling_frequencies, reference_systems, eeg_labels = load_references(train_folder,90)
print(eeg_labels)
idx = ids[3]
channel = channels[3]
data_s = data[3]
fs = sampling_frequencies[3]
ref = reference_systems[3]
model_abgabe = "model_abgabe/"
prediction = predict_labels(channel, data_s, fs, ref, model_abgabe)
print(prediction)

10	 Dateien wurden geladen.
[(1, 26.08, 50.1025), (1, 2.9212, 32.3607), (0, 0.0, 0.0), (1, 34.8275, 63.0425), (1, 9.1971, 23.5505), (1, 5.285, 26.4575), (1, 19.7015, 28.5202), (0, 0.0, 0.0), (0, 0.0, 0.0), (0, 0.0, 0.0)]
{'seizure_present': True, 'seizure_confidence': 0.5, 'onset': 42.0, 'onset_confidence': 0.99, 'offset': 999999, 'offset_confidence': 0}


In [1]:
# -*- coding: utf-8 -*-
"""

Skript testet das vortrainierte Modell


@author:  Maurice Rohr, Dirk Schweickard
"""


import numpy as np
import json
import os
from typing import List, Tuple, Dict, Any
from wettbewerb import get_6montages

# Pakete aus dem Vorlesungsbeispiel
import mne
from scipy import signal as sps
import ruptures as rpt
import torch 
import torch.nn as nn
from CNN_model_copy import CNN_EEG
from new_preprocess import preprocess_signal_with_montages
from faster_features import window_prediction, feature_extraction_window
#from CNN_dataset import window_data_evaluate, create_fixed_grid_maps
from glob import glob
from scipy.signal import iirnotch, butter, sosfiltfilt, resample_poly, tf2sos


###Signatur der Methode (Parameter und Anzahl return-Werte) darf nicht verändert werden
def predict_labels(channels : List[str], data : np.ndarray, fs : float, reference_system: str, model_name : str='model.json') -> Dict[str,Any]:
    '''
    Parameters
    ----------
    channels : List[str]
        Namen der übergebenen Kanäle
    data : ndarray
        EEG-Signale der angegebenen Kanäle
    fs : float
        Sampling-Frequenz der Signale.
    reference_system :  str
        Welches Referenzsystem wurde benutzt, "Bezugselektrode", nicht garantiert korrekt!
    model_name : str
        Name eures Models,das ihr beispielsweise bei Abgabe genannt habt. 
        Kann verwendet werden um korrektes Model aus Ordner zu laden
    Returns
    -------
    prediction : Dict[str,Any]
        enthält Vorhersage, ob Anfall vorhanden und wenn ja wo (Onset+Offset)
    '''

#------------------------------------------------------------------------------
# Euer Code ab hier  

    # Initialisiere Return (Ergebnisse)
    seizure_present = True # gibt an ob ein Anfall vorliegt
    seizure_confidence = 0.5 # gibt die Unsicherheit des Modells an (optional)
    onset = 4.2   # gibt den Beginn des Anfalls an (in Sekunden)
    onset_confidence = 0.99 # gibt die Unsicherheit bezüglich des Beginns an (optional)
    offset = 999999  # gibt das Ende des Anfalls an (optional)
    offset_confidence = 0   # gibt die Unsicherheit bezüglich des Endes an (optional)

    # Modell Aufsetzen
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #Daten vorbereiten
    window_size = 4
    step_size = 1
    target_fs = 256
    original_fs = fs
 
    
    montage_names, montage_data, montage_missing,target_fs = preprocess_signal_with_montages(channels, data,target_fs,original_fs) 
    
    windows, timestamps = window_prediction(montage_data, target_fs, window_size, step_size)
    data_for_class = []
    # Feature extraction and brain map calculation
    for win in windows:
        features = feature_extraction_window(win, fs) # shape: (n_channels, n_features)
        assert not np.isnan(features).any(), "NaN in features!"
        x = torch.tensor(features, dtype = torch.float)
        data_for_class.append(x)
        

    # Klassifikation
    predictions_per_window =[]
    with torch.no_grad():
        probs = predictions_ensemble(data_for_class ,model_name, device)
        predictions_per_window = [int(p > 0.5) for p in probs]

    seizure_present = False
    seizure_present, onset_candidate = detect_onset(predictions_per_window, timestamps, min_consecutive=2)
    if seizure_present:
        onset = onset_candidate

        
#------------------------------------------------------------------------------  
    prediction = {"seizure_present":seizure_present,"seizure_confidence":seizure_confidence,
                   "onset":onset,"onset_confidence":onset_confidence,"offset":offset,
                   "offset_confidence":offset_confidence}
  
    return prediction # Dictionary mit prediction - Muss unverändert bleiben!
                               
                               
        
def predictions_ensemble(data_for_class: List[torch.Tensor], model_name: str, device: torch.device) -> List[float]:
    file_paths = sorted([os.path.join(model_name, f) for f in os.listdir(model_name) if f.endswith(".pth")])
    batch_tensor = torch.stack(data_for_class).to(device)
    probs = []

    with torch.no_grad():
        for path in file_paths:
            model = CNN_EEG(6, 1).to(device)
            model.load_state_dict(torch.load(path, map_location=device))
            model.eval()
            outputs = torch.sigmoid(model(batch_tensor).squeeze())
            probs.append(outputs.cpu().numpy())  # shape: (num_windows,)

    ensemble_probs = np.mean(probs, axis=0)  # Mittelwert pro Fenster
    return ensemble_probs.tolist()  # Gib Liste von Wahrscheinlichkeiten zurück


def detect_onset(predictions, timestamps, min_consecutive=2):
    predictions = torch.tensor(predictions)
    for i in range(len(predictions) - min_consecutive + 1):
        if torch.all(predictions[i:i+min_consecutive] == 1):
            return True, timestamps[i]
    return False, None



def notch_filter(signal, fs, freq=50.0, Q=30.0):
    w0 = freq / (fs / 2)
    b, a = iirnotch(w0, Q)
    sos = tf2sos(b, a)  # Transferfunktion → SOS
    return sosfiltfilt(sos, signal, axis=-1)


def bandpass_filter(signal, fs, lowcut=1.0, highcut=120.0, order=4):
    sos = sps.butter(order, [lowcut, highcut], btype='band', fs=fs, output='sos')
    return sosfiltfilt(sos, signal, axis=-1)

def resample_signal(signal, original_fs, target_fs=256):
    if original_fs == target_fs:
        return signal
    gcd = np.gcd(int(original_fs), int(target_fs))
    up = int(target_fs // gcd)
    down = int(original_fs // gcd)
    return resample_poly(signal, up, down, axis=-1)

In [6]:
from wettbewerb import load_references
train_folder = "../shared_data/training_mini" 
ids, channels, data, sampling_frequencies, reference_systems, eeg_labels = load_references(train_folder,90)
print(eeg_labels)
idx = ids[3]
channel = channels[3]
data_s = data[3]
fs = sampling_frequencies[3]
ref = reference_systems[3]
model_abgabe = "model_abgabe/"
prediction = predict_labels(channel, data_s, fs, ref, model_abgabe)
print(prediction)

10	 Dateien wurden geladen.
[(1, 26.08, 50.1025), (1, 2.9212, 32.3607), (0, 0.0, 0.0), (1, 34.8275, 63.0425), (1, 9.1971, 23.5505), (1, 5.285, 26.4575), (1, 19.7015, 28.5202), (0, 0.0, 0.0), (0, 0.0, 0.0), (0, 0.0, 0.0)]
{'seizure_present': True, 'seizure_confidence': 0.5, 'onset': 41.0, 'onset_confidence': 0.99, 'offset': 999999, 'offset_confidence': 0}


In [None]:
# Code zum Zusammenführen von .pt Dateien -> reduziert die LAdezeit am Anfang des Traiings massiv
import os
import torch
from glob import glob

ordner = "/home/jupyter-wki_team_3/wki-sose25/montage_datasets/"
unterordner = [f for f in os.listdir(ordner) if os.path.isdir(os.path.join(ordner, f)) and not f.startswith('.')]
    
for config in unterordner:

    # === Einstellungen ===
    source_dir = "montage_datasets/" + config
    target_dir = "montage_datasets/combined/" + config
    batch_size = 1000  # Anzahl Dateien pro kombiniertes File

    os.makedirs(target_dir, exist_ok=True)

    # === Alle .pt-Dateien finden ===
    file_paths = sorted(glob(os.path.join(source_dir, "*.pt")))

    combined_samples = []
    file_counter = 0

    for i, file_path in enumerate(file_paths):
        try:
            sample = torch.load(file_path)
            combined_samples.append(sample)

            # Sobald batch_size erreicht ist, speichern
            if len(combined_samples) >= batch_size:
                save_path = os.path.join(target_dir, f"combined_{file_counter}.pt")
                torch.save(combined_samples, save_path)
                combined_samples = []
                file_counter += 1
        except Exception as e:
            print(f"Fehler bei {file_path}: {e}")

    # Rest speichern
    if combined_samples:
        save_path = os.path.join(target_dir, f"combined_{file_counter}.pt")
        torch.save(combined_samples, save_path)
        

    print(f"config {config} gespeichert.")


In [None]:
import os
import torch
from glob import glob

# === Einstellungen ===
input_root = "montage_datasets/combined/"
output_root_spectral = "montage_datasets/spectral_only/"
output_root_temporal = "montage_datasets/temporal_only/"

# Erstelle Zielverzeichnisse, wenn nicht vorhanden
os.makedirs(output_root_spectral, exist_ok=True)
os.makedirs(output_root_temporal, exist_ok=True)

# Alle Konfigurations-Unterordner finden
configs = [f for f in os.listdir(input_root) if os.path.isdir(os.path.join(input_root, f))]

for config in configs:
    input_dir = os.path.join(input_root, config)
    output_dir_spec = os.path.join(output_root_spectral, config)
    output_dir_temp = os.path.join(output_root_temporal, config)

    os.makedirs(output_dir_spec, exist_ok=True)
    os.makedirs(output_dir_temp, exist_ok=True)

    pt_files = sorted(glob(os.path.join(input_dir, "*.pt")))

    for file_path in pt_files:
        try:
            samples = torch.load(file_path)  # List of (channels x 15) matrices

            spectral_list = []
            temporal_list = []

            for sample in samples:
                feature_matrix = sample[0]  # (channels x 15)
                if isinstance(feature_matrix, np.ndarray):
                    feature_matrix = torch.tensor(feature_matrix, dtype=torch.float32)
                print(f"feature_matrix shape: {feature_matrix.shape}, dtype: {feature_matrix.dtype}")
                spectral = torch.cat([feature_matrix[:, :10], feature_matrix[:, 13:14]], dim=1)
                temporal = torch.cat([feature_matrix[:, 10:13], feature_matrix[:, 14:15]], dim=1)

                spectral_list.append(spectral)
                temporal_list.append(temporal)

            base_name = os.path.basename(file_path)
            torch.save(spectral_list, os.path.join(output_dir_spec, base_name))
            torch.save(temporal_list, os.path.join(output_dir_temp, base_name))

            print(f"{base_name} in {config} erfolgreich aufgeteilt.")

        except Exception as e:
            print(f"Fehler bei {file_path}: {e}")

In [3]:
import torch

torch.cuda.empty_cache()

In [4]:
import torch

file_path = "montage_datasets/combined/win4_step1/combined_0.pt"
data = torch.load(file_path)

print("Type of top-level object:", type(data))
print("Length of list:", len(data))

# Inspect first element
first = data[0]
print("\nFirst element type:", type(first))

if isinstance(first, torch.Tensor):
    print("  Shape:", first.shape)
    print("  Dtype:", first.dtype)
    print("  Sample values:", first.flatten()[:5].tolist())

elif isinstance(first, list) or isinstance(first, tuple):
    print("  Nested list/tuple with length:", len(first))
    print("  First nested element:", first[0])

else:
    print("  Value:", first)

Type of top-level object: <class 'list'>
Length of list: 1000

First element type: <class 'tuple'>
  Nested list/tuple with length: 4
  First nested element: [[ 0.39671343  0.31002456  1.20167539  0.61168035 -0.78912795  0.56392585
   0.78173435  1.22542846  0.65210768 -0.59066113 -0.6107416  -0.66900684
   1.06112907  0.45223035 -0.44306134]
 [ 2.07237451  2.0163187  -0.32710171  1.74845449  1.39890744  1.99441151
   1.55668983 -0.17410813  1.66426925  1.62063475  1.59590003  1.03053922
  -0.86959759 -0.62985493 -0.53864759]
 [-0.50184262 -0.04353399  1.41114218 -0.37794431 -0.2044818  -0.45186591
   0.36822387  1.20798724 -0.28762193 -0.04417692 -0.29354535  0.64624984
  -0.78630999 -0.60705251 -0.53864759]
 [-0.68282521 -0.50366493 -0.17940142 -0.33128269 -0.86625855 -0.74481138
  -0.39366044 -0.02297837 -0.21150563 -0.95220757 -0.90680798 -0.97890649
   0.59597551  1.33618937 -0.15656792]
 [-0.69180106 -0.88276054 -0.73129286 -0.13501362  1.35339536 -0.77654973
  -1.14960137 -0.614

In [5]:
import torch

data = torch.load("montage_datasets/combined/win4_step1/combined_0.pt")

# Print one sample
sample = data[0]
print("Tuple length:", len(sample))

for i, part in enumerate(sample):
    print(f"\nItem {i}:")
    print("  Type:", type(part))
    try:
        print("  Shape:", part.shape)
    except AttributeError:
        print("  Value:", part)

Tuple length: 4

Item 0:
  Type: <class 'numpy.ndarray'>
  Shape: (6, 15)

Item 1:
  Type: <class 'int'>
  Value: 0

Item 2:
  Type: <class 'str'>
  Value: aaaaaaac_s001_t000

Item 3:
  Type: <class 'float'>
  Value: 0.0


In [6]:
import torch
import numpy as np

file_path = "montage_datasets/combined/win4_step1"
data = torch.load(file_path)

X = [sample[0].flatten() for sample in data]   # Flatten [6,15] → [90]
y = [sample[1] for sample in data]             # Use the int label

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)  # (1000, 90)
print("y shape:", y.shape)  # (1000,)
print("Label distribution:", np.bincount(y))

IsADirectoryError: [Errno 21] Is a directory: 'montage_datasets/combined/win4_step1'

In [12]:
import pywt

def compute_scalogram_tensor(window, fs, wavelet='morl', scales=np.arange(1, 64)):
    """
    Compute continuous wavelet transform (scalogram) for each channel.
    """
    scalograms = []
    for ch in window:
        coeffs, _ = pywt.cwt(ch, scales=scales, wavelet=wavelet, sampling_period=1/fs)
        scalogram = np.abs(coeffs)
        scalograms.append(scalogram)

    scal_tensor = np.stack(scalograms, axis=0)  # (channels, scales, time)
    return torch.tensor(scal_tensor, dtype=torch.float32)

In [20]:
from wettbewerb import EEGDataset
from new_preprocess import preprocess_signal_with_montages
from new_features import window_eeg_data
import os, torch
import numpy as np
import pywt
import time
from multiprocessing import Pool, cpu_count

# === Set config ===
window_size = 4
step_size = 1
save_folder = f"scalogram_dataset/win{window_size}_step{step_size}"
os.makedirs(save_folder, exist_ok=True)

# === Parameters shared across processes ===
scales = np.linspace(1, 64, 32)
wavelet = 'morl'
dataset = EEGDataset("../shared_data/training")


def compute_single_scalogram(args):
    """Unpack input and compute scalogram."""
    idx, window, fs, lbl, eeg_id, ts = args
    coeffs_all = []
    for ch_data in window:
        coeffs, _ = pywt.cwt(ch_data, scales, wavelet, sampling_period=1/fs)
        coeffs_all.append(np.abs(coeffs).astype(np.float32))
    scalogram = np.stack(coeffs_all, axis=0)  # [channels, scales, time]

    save_path = os.path.join(save_folder, f"{eeg_id}_win{idx}_lbl{lbl}.pt")
    torch.save((scalogram, lbl, eeg_id, ts), save_path)
    return idx


# === Main processing loop ===
overall_start_time = time.time()
for i in range(len(dataset)):
    eeg_id, channels, raw_data, fs, _, label = dataset[i]
    seizure_label, seizure_onset, seizure_offset = label

    # 1. Preprocess
    montage_names, processed_signal, montage_missing, new_fs = preprocess_signal_with_montages(
        channels, raw_data, target_fs=256, original_fs=fs, ids=eeg_id
    )

    if montage_missing:
        print(f"Skipping {eeg_id} (montage missing)")
        continue

    # 2. Windowing + labeling
    windows, labels, timestamps = window_eeg_data(
        processed_signal, resampled_fs=new_fs,
        seizure_onset=seizure_onset,
        seizure_offset=seizure_offset,
        window_size=window_size,
        step_size=step_size
    )

    if not windows:
        print(f"No valid windows for {eeg_id}")
        continue

    # 3. Generate scalograms in parallel
    file_start = time.time()
    with Pool(processes=cpu_count()) as pool:
        args = [
            (idx, window, new_fs, lbl, eeg_id, ts)
            for idx, (window, lbl, ts) in enumerate(zip(windows, labels, timestamps))
        ]
        pool.map(compute_single_scalogram, args)

    file_time = time.time() - file_start
    print(f"[{i+1}/{len(dataset)}] ✅ Finished {eeg_id} with {len(windows)} windows in {file_time:.2f}s", end = '\r')

total_time = time.time() - overall_start_time
print(f"\n⏱️ Total time to generate scalograms: {total_time:.2f} seconds")
print(f"✅ All done. Saved to: {save_folder}")

[96/6213] ✅ Finished aaaaaqtu_s011_t000 with 297 windows in 1.55sss

Exception ignored in: <function _releaseLock at 0x7fc6883963a0>
Traceback (most recent call last):
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 
Process ForkPoolWorker-38337:
Traceback (most recent call last):
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/multiprocessing/queues.py", line 356, in get
    res = self._reader.recv_bytes()
  File "/home/jupyter-wki_team_3/.conda/envs/wki-sose25/lib/python3.8/multiprocessing/conn

KeyboardInterrupt: 

In [24]:
import os
import torch
import numpy as np
from wettbewerb import EEGDataset
from new_preprocess import preprocess_signal_with_montages
from new_features import window_eeg_data
import time

# === Config ===
window_size = 4  # seconds
step_size = 1    # seconds
target_fs = 256  # Hz
save_folder = f"raw_dataset/win{window_size}_step{step_size}"
print("Creating Folder")
os.makedirs(save_folder, exist_ok=True)

# === Dataset ===
dataset = EEGDataset("../shared_data/training")

# === Timing (optional) ===
overall_start = time.time()
for i in range(len(dataset)):
    eeg_id, channels, raw_data, fs, _, label = dataset[i]
    seizure_label, seizure_onset, seizure_offset = label

    # === Preprocess with Montages ===
    #print(f"Start Preprocessing: {eeg_id}", end ='\r')
    montage_names, processed_signal, montage_missing, new_fs = preprocess_signal_with_montages(
        channels, raw_data, target_fs=target_fs, original_fs=fs, ids=eeg_id
    )

    if montage_missing:
        print(f"Skipping {eeg_id} (montage missing)")
        continue

    # === Window and Label ===
    #print(f"Start Windowing: {eeg_id}", end ='\r')
    windows, labels, timestamps = window_eeg_data(
        processed_signal,
        resampled_fs=new_fs,
        seizure_onset=seizure_onset,
        seizure_offset=seizure_offset,
        window_size=window_size,
        step_size=step_size
    )

    file_start = time.time()
    for idx, (window, lbl, ts) in enumerate(zip(windows, labels, timestamps)):

        # Optional: Normalize each channel in the window (z-score)
        window = (window - window.mean(axis=1, keepdims=True)) / (window.std(axis=1, keepdims=True) + 1e-8)

        save_path = os.path.join(save_folder, f"{eeg_id}_win{idx}_lbl{lbl}.pt")
        torch.save((window, lbl, eeg_id, ts), save_path)

    print(f"[{i+1}/{len(dataset)}] ✅ {eeg_id} with {len(windows)} windows in {time.time() - file_start:.2f}s", end='\r')

# === Overall Time ===
overall_time = time.time() - overall_start
print(f"\n⏱️ Total time: {overall_time:.2f} seconds")
print(f"✅ Finished preprocessing to: {save_folder}")

Creating Folder
[6213/6213] ✅ aaaaatez_s006_t010 with 297 windows in 0.15s8ss
⏱️ Total time: 2501.54 seconds
✅ Finished preprocessing to: raw_dataset/win4_step1
