In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from scipy.stats import kurtosis, skew, entropy

In [26]:
data_path = 'Electroencephalogram (EEG) dataset with internet addiction and healthy teenagers under rest and executive function task/Data'
result_file_path = 'Electroencephalogram (EEG) dataset with internet addiction and healthy teenagers under rest and executive function task/Data/ResultCombine.csv'

In [27]:
def spectral_entropy(signal):
    power_spectrum = np.abs(np.fft.fft(signal)) ** 2
    psd_norm = power_spectrum / np.sum(power_spectrum)
    return entropy(psd_norm)

In [None]:
def extract_features_task(data):
    features = []

    for col in ['attention', 'meditation']:
        if col in data.columns:
            col_data = data[col].dropna().values
            features.append(np.mean(col_data))
            features.append(np.std(col_data))
        else:
            features.extend([0, 0])

    eeg_bands = ['LowAlpha', 'HighAlpha', 'LowBeta', 'HighBeta', 'Delta', 'Theta', 'LowGamma', 'MiddleGamma']

    for band in eeg_bands:
        if band in data.columns:
            signal = data[band].values
            features.append(np.mean(signal))
            features.append(np.std(signal))
            features.append(kurtosis(signal))
            features.append(skew(signal))
            features.append(np.ptp(signal))
            features.append(np.sum(signal))
            features.append(spectral_entropy(signal))
        else:
            features.extend([0] * 8)

    return features

In [29]:
def extract_data(subject_data_path, result_file):
    all_data = []
    result_df = pd.read_csv(result_file)

    subject_folders = sorted([f for f in os.listdir(subject_data_path) if f.startswith('S')])

    for subject in subject_folders:
        subject_path = os.path.join(subject_data_path, subject)
        tasks = sorted([f for f in os.listdir(subject_path) if f.endswith('.csv')])

        subject_row = result_df[result_df['Subject'] == subject]
        if subject_row.empty:
            print(f"Metadata missing for subject: {subject}")
            continue

        addiction_level = subject_row['Internet Addiction Result'].values[0]
        personality = subject_row['Personality Result'].values[0]
        gender = subject_row['Gender'].values[0]
        memory_words_correct = subject_row['Memory Words Correct'].values[0]

        subject_features = []

        for task in tasks:
            task_file_path = os.path.join(subject_path, task)

            try:
                column_names = ['attention', 'meditation', 'LowAlpha', 'HighAlpha', 'LowBeta', 'HighBeta', 'Delta', 'Theta', 'LowGamma', 'MiddleGamma']
                df = pd.read_csv(task_file_path, header=None, names=column_names)
                task_features = extract_features_task(df)
                subject_features.extend(task_features)
            except Exception as e:
                print(f"Skipping {task_file_path}: {e}")
                subject_features.extend([0] * 68)

        subject_features.extend([gender, memory_words_correct, addiction_level, personality])
        all_data.append(subject_features)

    return pd.DataFrame(all_data)

In [30]:
final_df = extract_data(data_path, result_file_path)

In [31]:
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,654,655,656,657,658,659,660,661,662,663
0,52.883333,17.364803,61.866667,9.947306,33254.633333,28935.250292,2.126606,1.465773,133649,1995278,...,10064.408364,5.744636,2.010266,57835,750619,2.123307,Male,9,Normal Level,Neuroticism
1,62.383333,15.006767,76.35,15.234637,22723.833333,18918.757704,2.232181,1.533889,89766,1363430,...,15316.947863,19.77311,3.926807,103878,784508,2.919513,Female,10,Mid Level,Extraversion
2,54.683333,10.032434,81.05,12.387393,28044.516667,29682.588778,7.810462,2.48717,168654,1682671,...,130722.98866,54.84711,7.533847,1026201,1375433,4.088796,Female,12,Moderate level,Agreeableness
3,51.416667,12.537001,81.966667,17.967532,40169.7,35957.017617,0.734286,1.254723,144794,2410182,...,15487.414905,3.799409,1.714898,77793,1238772,1.99374,Male,4,Mid Level,Openness
4,68.85,17.638807,65.8,26.314001,11817.333333,9137.372145,0.292201,1.056156,36179,709040,...,11721.574124,0.901027,1.07744,49218,922164,2.016672,Female,13,Mid Level,Neuroticism
5,65.0,22.015903,74.1,17.407565,23352.866667,13161.993026,-0.14854,0.524532,56090,1401172,...,4860.858069,2.508093,1.285763,24105,419896,1.825621,Male,7,Mid Level,Extraversion
6,48.766667,11.914091,83.75,13.9936,33277.266667,32852.54365,2.923985,1.826437,146381,1996636,...,11446.542204,5.610303,2.442465,54348,490738,3.131773,Male,7,Moderate level,Conscientiousness
7,51.666667,12.698644,80.8,17.97016,96771.333333,80455.416756,1.183275,1.11688,372246,5806280,...,505591.97896,-1.729283,-0.204059,1542952,53716334,1.378315,Male,10,Severe Level,Openness
8,28.966667,23.323784,68.35,19.101331,73833.733333,51581.96597,2.061505,1.356954,259187,4430024,...,18607.258988,15.206576,3.846065,107803,640372,3.438652,Male,7,Moderate level,Openness
9,73.766667,18.212969,50.533333,13.622857,26876.45,23962.77545,7.269148,2.239522,141475,1612587,...,4323.920655,0.020981,0.792164,19353,380296,1.790713,Female,8,Normal Level,Extraversion


In [32]:
am_features = ['attention_mean', 'attention_std', 'meditation_mean', 'meditation_std']
bands = ['LowAlpha', 'HighAlpha', 'LowBeta', 'HighBeta', 'Delta', 'Theta', 'LowGamma', 'MiddleGamma']
eeg_stats = ['mean', 'std', 'kurtosis', 'skewness', 'range', 'bandpower_sum', 'spectral_entropy']
eeg_features = [f"{band}_{stat}" for band in bands for stat in eeg_stats]
features_per_task = am_features + eeg_features

all_feature_names = []

for task_idx in range(1, 12):
    all_feature_names.extend([f"task{task_idx}_{feat}" for feat in features_per_task])

all_feature_names += ['gender', 'memory_words_correct', 'internet_addiction_level', 'personality']

In [33]:
final_df.columns = all_feature_names

In [None]:
print(f"Final DataFrame shape: {final_df.shape}")
print(f"Number of column names assigned: {len(all_feature_names)}")
print(final_df.head(1).T)


Final DataFrame shape: (30, 664)
Number of column names assigned: 664
                                                0
task1_attention_mean                    52.883333
task1_attention_std                     17.364803
task1_meditation_mean                   61.866667
task1_meditation_std                     9.947306
task1_LowAlpha_mean                  33254.633333
...                                           ...
task11_MiddleGamma_spectral_entropy      2.123307
gender                                       Male
memory_words_correct                            9
internet_addiction_level             Normal Level
personality                           Neuroticism

[664 rows x 1 columns]


In [35]:
final_df.to_csv('preprocessed_eeg_features.csv', index=False)