In [7]:
import pickle
import numpy as np
import os
import pandas as pd

def read_data(filename):
    with open(filename, 'rb') as f:
        x = pickle._Unpickler(f)
        x.encoding = 'latin1'
        data = x.load()
    return data

# List of participant file names
files = [f"{i:02}" for i in range(1, 33)]

labels = []
data = []

base_path = "/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/"

for i in files:
    file_path = os.path.join(base_path, f"s{i}.dat")
    d = read_data(file_path)
    labels.append(d['labels'])
    data.append(d['data'])

In [2]:
print(data[0].shape)
print(len(data))
print('----------')
print(labels[0].shape)
print(len(labels))

(40, 40, 8064)
32
----------
(40, 4)
32


In [3]:
#for the channels 
for i in range(len(data)):
    data[i]= data[i][:, :32, :]

In [4]:
print(data[0].shape)
print(len(data))
print('----------')
print(labels[0].shape)
print(len(labels))

(40, 32, 8064)
32
----------
(40, 4)
32


In [5]:
EEG_ch_names = ['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1',
                'P3', 'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8',
                'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2']

## Removed time dep version

In [6]:
import numpy as np

notime_data = []
notime_labels = []

# for each  participant , we have 32
for p_idx, person in enumerate(data):
    for t_idx, trial in enumerate(person):
        # we collapse time dimension: take mean over timepoints 
        trial_vector = np.mean(trial, axis=1)  # shape: (40,)
        notime_data.append(trial_vector)
        
        # must get the corresponding label for this trial
        notime_labels.append(labels[p_idx][t_idx])  # shape: (4,)

notime_data = np.array(notime_data)       # shape: (32*40, 40) = (1280, 40)
notime_labels = np.array(notime_labels)   # shape: (1280, 4)

print("Data shape:", notime_data.shape)
print("Labels shape:", notime_labels.shape)


Data shape: (1280, 32)
Labels shape: (1280, 4)


In [8]:
df_features = pd.DataFrame(notime_data, columns=EEG_ch_names)

# labels
label_names = ['Valence', 'Arousal', 'Dominance', 'Liking']
df_labels = pd.DataFrame(notime_labels, columns=label_names)

# merge features + labels
df = pd.concat([df_features, df_labels], axis=1)

print(df.head())
print("Shape of full DataFrame:", df.shape)

        Fp1       AF3        F3        F7       FC5       FC1        C3  \
0 -0.028259 -0.015335 -0.003302 -0.056503 -0.028488  0.014347 -0.000252   
1 -0.082712 -0.031351 -0.020466 -0.113204 -0.083982 -0.026306 -0.034776   
2  0.060689  0.081124  0.059805  0.023111 -0.008049  0.045063  0.006847   
3 -0.027773 -0.011902  0.017439 -0.027303  0.049521  0.037495  0.014994   
4 -0.062537 -0.072949 -0.071271 -0.051193 -0.028972 -0.053433 -0.040939   

         T7       CP5       CP1  ...       CP6       CP2        P4        P8  \
0 -0.068311 -0.010738  0.040199  ...  0.036044  0.034916  0.024629  0.007397   
1 -0.051734 -0.027296  0.021282  ...  0.016032  0.061128  0.076130  0.068246   
2  0.019250 -0.032186 -0.007827  ... -0.034721 -0.016302 -0.075312 -0.027588   
3  0.003586  0.044774  0.024977  ... -0.066859 -0.021336  0.000258 -0.095102   
4  0.013520  0.015260 -0.038656  ...  0.048018 -0.013598  0.043253  0.064102   

        PO4        O2  Valence  Arousal  Dominance  Liking  
0  0.01

In [9]:
df

Unnamed: 0,Fp1,AF3,F3,F7,FC5,FC1,C3,T7,CP5,CP1,...,CP6,CP2,P4,P8,PO4,O2,Valence,Arousal,Dominance,Liking
0,-0.028259,-0.015335,-0.003302,-0.056503,-0.028488,0.014347,-0.000252,-0.068311,-0.010738,0.040199,...,0.036044,0.034916,0.024629,0.007397,0.012641,-0.011155,1.29,1.40,6.90,7.83
1,-0.082712,-0.031351,-0.020466,-0.113204,-0.083982,-0.026306,-0.034776,-0.051734,-0.027296,0.021282,...,0.016032,0.061128,0.076130,0.068246,0.142353,0.156159,0.90,1.69,7.28,8.47
2,0.060689,0.081124,0.059805,0.023111,-0.008049,0.045063,0.006847,0.019250,-0.032186,-0.007827,...,-0.034721,-0.016302,-0.075312,-0.027588,-0.108040,-0.123599,0.42,1.46,9.00,7.08
3,-0.027773,-0.011902,0.017439,-0.027303,0.049521,0.037495,0.014994,0.003586,0.044774,0.024977,...,-0.066859,-0.021336,0.000258,-0.095102,-0.014530,-0.021971,4.94,6.01,6.12,8.06
4,-0.062537,-0.072949,-0.071271,-0.051193,-0.028972,-0.053433,-0.040939,0.013520,0.015260,-0.038656,...,0.048018,-0.013598,0.043253,0.064102,0.062947,0.073402,6.96,3.92,7.19,6.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,-0.015068,-0.439097,-0.002601,-0.342746,0.305603,0.086212,0.312185,-0.432129,0.585818,-0.126779,...,0.908987,0.505454,0.326118,-0.246413,0.849521,0.219849,3.91,6.96,5.82,3.12
1276,-0.074093,-0.608723,0.074181,-0.402769,0.524283,0.201509,0.524640,-0.664969,0.913419,-0.261409,...,1.355936,0.826961,0.426723,-0.433220,1.360671,0.266681,2.81,6.13,6.06,1.04
1277,-0.012218,0.272855,-0.025265,0.163852,-0.209080,-0.064046,-0.162996,0.352787,-0.369132,0.180440,...,-0.702924,-0.375559,-0.208621,0.179822,-0.669554,-0.149759,3.05,7.01,5.10,1.10
1278,0.048134,0.047834,-0.016544,0.084249,0.001667,-0.034263,-0.048989,0.056246,-0.045314,-0.032114,...,-0.052007,-0.024993,-0.026647,0.023883,-0.003679,0.023943,3.99,7.17,4.85,1.00


In [10]:
df.columns

Index(['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3',
       'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6',
       'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2',
       'Valence', 'Arousal', 'Dominance', 'Liking'],
      dtype='object')

In [11]:
df.to_csv("deap_eeg_data.csv")

## Preserved time dependancy verison

In [12]:
data2=[]
labels2=[]

In [13]:
def getbatches(biglist):
    n = 10  # number of parts
    chunk_size = len(biglist) // n
    remainder = len(biglist) % n
    
    result = []
    start = 0
    
    for i in range(n):
        end = start + chunk_size + (1 if i < remainder else 0)  
        result.append(biglist[start:end])
        start = end
    return result

In [14]:
import numpy as np
from scipy.stats import skew, kurtosis
def get_stat_data(batches):
    features = []
    for batch in batches:  # batch is a list of values
        batch = np.array(batch)
        batch_features = [
            np.mean(batch),
            np.std(batch),
            np.min(batch),
            np.max(batch),
            np.max(batch) - np.min(batch),  # range
            np.median(batch),
            skew(batch),
            kurtosis(batch),
            np.var(batch)
        ]
        features.extend(batch_features)
    return np.array(features)

In [15]:
data2 = []
labels2=[]

In [16]:
labels_data=d['labels'][0]
print(labels_data)


[8.13 4.83 9.   4.87]


In [17]:
for person in files:
    file_path = os.path.join(base_path, f"s{person}.dat")
    print(file_path)
    d = read_data(file_path)
    for records in range(30):
    
                # first person of each file (the second 0)
            trial_data = d['data'][records][:32, :] # shape (32, 8064)
            trial_labels = d['labels'][records] 
            
            person_features = []  # store the 32x90 features
            
            for feature in range(32):
                batches = getbatches(trial_data[feature])
                features = get_stat_data(batches)  # length 90
                person_features.append(features)
            
            person_features = np.array(person_features)  # shape (32, 90)
            #print(person_features.shape)
            data2.append(person_features)
            labels2.append(trial_labels)

data2 = np.array(data2)  # shape: (1280, 32, 90) - all trials from all participants
labels2 = np.array(labels2)
print(data2.shape)

/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s01.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s02.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s03.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s04.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s05.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s06.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s07.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s08.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s09.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s10.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s11.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s12.dat
/kaggle/input/deap-dataset/deap-dataset/data_preprocessed_python/s13.dat
/kaggle/input/deap-dataset/deap-dataset/data_prepro

In [18]:
import pandas as pd
import numpy as np

# Flatten EEG features: (1280, 32, 90) -> (1280, 2880)
num_samples = data2.shape[0]
data2_flat = data2.reshape(num_samples, -1)

# Create EEG feature column names
columns = [f'ch{ch+1}_f{f+1}' for ch in range(32) for f in range(90)]

# Create DataFrames
df_eeg = pd.DataFrame(data2_flat, columns=columns)
df_labels = pd.DataFrame(labels2, columns=['valence', 'arousal', 'dominance', 'liking'])

# Merge labels + features
df_full = pd.concat([df_labels, df_eeg], axis=1)

# Save to CSV
csv_filename = "initial_eeg_features_with_labels(time_dep).csv"
df_full.to_csv(csv_filename, index=False)

print(f"✅ Saved real EEG features with labels to {csv_filename}")


✅ Saved real EEG features with labels to initial_eeg_features_with_labels(time_dep).csv
