In [149]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

In [150]:
df = pd.read_csv("MW.txt", delimiter='\t', header=None) # 4th column is the event marker
#print(df.head(10))
print(df.shape)
df[4].unique()

(67635, 7)


array([ 0,  1,  4,  8,  5,  7,  2,  9,  3,  6, -1])

In [151]:
df.columns = ['ID', 'Timestamp', 'Subject', 'Channel', 'Stimulus', 'Count', 'Signal']
#print(raw_series.head(10))

In [152]:
#print(df['Signal'].head(10))
df['Signal'] = df['Signal'].apply(lambda s: list(map(float, s.split(','))))

median_length = int(np.median(df['Count']))

print(f"Median signal length: {median_length}")

def resample_signal(signal, target_length):
    if len(signal) == target_length:
        return signal  # No resampling needed
    if len(signal) < 2:
        return np.full(target_length, signal[0] if len(signal) == 1 else 0.0)  # Pad constant
    original_indices = np.arange(len(signal))
    target_indices = np.linspace(0, len(signal) - 1, target_length)
    interpolator = interp1d(original_indices, signal, kind='linear')
    return interpolator(target_indices)

df['Signal'] = (df['Signal'].apply(lambda s: resample_signal(s, median_length))).T

#print(df['Signal'].head(10))  
#print(df['Signal'].apply(len))  


Median signal length: 953


In [153]:
def interpolate_nans(signal_1d):
    if np.isnan(signal_1d).any():
        x = np.arange(len(signal_1d))
        nan_mask = np.isnan(signal_1d)
        signal_1d[nan_mask] = np.interp(x[nan_mask], x[~nan_mask], signal_1d[~nan_mask])
    return signal_1d

# Process each row as an individual epoch
epochs_data = []
labels = []

for _, row in df.iterrows():
    signal = row['Signal']
    signal = resample_signal(signal, median_length)
    signal = interpolate_nans(signal)

    signal = np.array(signal).reshape(1, -1)  # shape: (1, median_length)
    epochs_data.append(signal)
    labels.append(row['Stimulus']) 

# Final stacking
epochs_data = np.stack(epochs_data, axis=0)  # shape: (n_epochs, 1, median_length)
labels = np.array(labels)

print(f"Final shape: {epochs_data.shape}")
print(f"Labels shape: {labels.shape}")


Final shape: (67635, 1, 953)
Labels shape: (67635,)


In [154]:
from sklearn.preprocessing import MinMaxScaler

n_channels = epochs_data.shape[1]
for ch in range(n_channels):
    scaler = MinMaxScaler()
    # Reshape for scaler: (n_epochs, n_times)
    reshaped = epochs_data[:, ch, :]
    reshaped_scaled = scaler.fit_transform(reshaped)
    epochs_data[:, ch, :] = reshaped_scaled


print(f"Cleaned dataset shape: {epochs_data.shape} (epochs, channels, times)")
print(f"Stimulus labels (example): {labels[:5]}")


Cleaned dataset shape: (67635, 1, 953) (epochs, channels, times)
Stimulus labels (example): [0 1 4 1 8]
