### Raw Data Analysis

##### Import Libraries:

In [1]:
# Libraries Import:

import pyedflib
import numpy as np
import pandas as pd
import mne
import matplotlib.pyplot as plt
import os
from mne.preprocessing import ICA
import pywt
import logging
import warnings

warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
mne.set_log_level('ERROR')

##### Defining File Paths:

In [2]:
# Importing EDF Files:

file_participant_1 = 'Data/RawData/Participant_1.edf'
file_participant_2 = 'Data/RawData/Participant_2.edf'
file_participant_3 = 'Data/RawData/Participant_3.edf'
file_participant_4 = 'Data/RawData/Participant_4.edf'
file_participant_5 = 'Data/RawData/Participant_5.edf'
file_participant_6 = 'Data/RawData/Participant_6.edf'
file_participant_7 = 'Data/RawData/Participant_7.edf'
file_participant_8 = 'Data/RawData/Participant_8.edf'
file_participant_9 = 'Data/RawData/Participant_9.edf'
file_participant_10 = 'Data/RawData/Participant_10.edf'
file_participant_11 = 'Data/RawData/Participant_11.edf'
file_participant_12 = 'Data/RawData/Participant_12.edf'
file_participant_13 = 'Data/RawData/Participant_13.edf'

edf_data_files = [
    file_participant_1,
    file_participant_2,
    file_participant_3,
    file_participant_4,
    file_participant_5,
    file_participant_6,
    file_participant_7,
    file_participant_8,
    file_participant_9,
    file_participant_10,
    file_participant_11,
    file_participant_12,
    file_participant_13
]

##### Extracting Channels of Interest:

In [3]:
# COI:

channels_of_interest = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']

In [4]:
# Extracting the COI:

participant_1_COI_dataset = mne.io.read_raw_edf(file_participant_1, preload = True).pick_channels(channels_of_interest)
participant_2_COI_dataset = mne.io.read_raw_edf(file_participant_2, preload = True).pick_channels(channels_of_interest)
participant_3_COI_dataset = mne.io.read_raw_edf(file_participant_3, preload = True).pick_channels(channels_of_interest)
participant_4_COI_dataset = mne.io.read_raw_edf(file_participant_4, preload = True).pick_channels(channels_of_interest)
participant_5_COI_dataset = mne.io.read_raw_edf(file_participant_5, preload = True).pick_channels(channels_of_interest)
participant_6_COI_dataset = mne.io.read_raw_edf(file_participant_6, preload = True).pick_channels(channels_of_interest)
participant_7_COI_dataset = mne.io.read_raw_edf(file_participant_7, preload = True).pick_channels(channels_of_interest)
participant_8_COI_dataset = mne.io.read_raw_edf(file_participant_8, preload = True).pick_channels(channels_of_interest)
participant_9_COI_dataset = mne.io.read_raw_edf(file_participant_9, preload = True).pick_channels(channels_of_interest)
participant_10_COI_dataset = mne.io.read_raw_edf(file_participant_10, preload = True).pick_channels(channels_of_interest)
participant_11_COI_dataset = mne.io.read_raw_edf(file_participant_11, preload = True).pick_channels(channels_of_interest)
participant_12_COI_dataset = mne.io.read_raw_edf(file_participant_12, preload = True).pick_channels(channels_of_interest)
participant_13_COI_dataset = mne.io.read_raw_edf(file_participant_13, preload = True).pick_channels(channels_of_interest)

In [5]:
# Resulting Raw EDF Info:

print(participant_1_COI_dataset)
print(participant_2_COI_dataset)
print(participant_3_COI_dataset)
print(participant_4_COI_dataset)
print(participant_5_COI_dataset)
print(participant_6_COI_dataset)
print(participant_7_COI_dataset)
print(participant_8_COI_dataset)
print(participant_9_COI_dataset)
print(participant_10_COI_dataset)
print(participant_11_COI_dataset)
print(participant_12_COI_dataset)
print(participant_13_COI_dataset)

<RawEDF | Participant_1.edf, 14 x 117760 (920.0 s), ~12.6 MB, data loaded>
<RawEDF | Participant_2.edf, 14 x 116480 (910.0 s), ~12.5 MB, data loaded>
<RawEDF | Participant_3.edf, 14 x 116608 (911.0 s), ~12.5 MB, data loaded>
<RawEDF | Participant_4.edf, 14 x 116352 (909.0 s), ~12.4 MB, data loaded>
<RawEDF | Participant_5.edf, 14 x 116224 (908.0 s), ~12.4 MB, data loaded>
<RawEDF | Participant_6.edf, 14 x 116864 (913.0 s), ~12.5 MB, data loaded>
<RawEDF | Participant_7.edf, 14 x 117376 (917.0 s), ~12.6 MB, data loaded>
<RawEDF | Participant_8.edf, 14 x 116480 (910.0 s), ~12.5 MB, data loaded>
<RawEDF | Participant_9.edf, 14 x 115840 (905.0 s), ~12.4 MB, data loaded>
<RawEDF | Participant_10.edf, 14 x 116352 (909.0 s), ~12.4 MB, data loaded>
<RawEDF | Participant_11.edf, 14 x 115840 (905.0 s), ~12.4 MB, data loaded>
<RawEDF | Participant_12.edf, 14 x 116864 (913.0 s), ~12.5 MB, data loaded>
<RawEDF | Participant_13.edf, 14 x 115840 (905.0 s), ~12.4 MB, data loaded>


##### Preprocessing:

In [6]:
# Function to Preprocess Raw Data:

def preprocess_raw_data(raw):

    # Handling NaNs: Replace NaNs with the mean of the respective channel
    raw_data = raw.get_data()

    for i in range(raw_data.shape[0]):
        nan_indices = np.isnan(raw_data[i])

        if np.any(nan_indices):
            mean_value = np.nanmean(raw_data[i])
            raw_data[i, nan_indices] = mean_value

    raw._data = raw_data

    # Filtering: Bandpass filter between 0.5-30 Hz
    raw.filter(0.5, 30., fir_design='firwin')
    
    # Artifact Removal: ICA
    ica = mne.preprocessing.ICA(n_components = 14, random_state = 97, max_iter = 800)
    ica.fit(raw)
    raw = ica.apply(raw)
    
    # Spatial Filtering: Common Average Reference (CAR)
    raw.set_eeg_reference('average', projection = True)
    
    # Channel Interpolation: Interpolate bad channels
    raw.interpolate_bads()

    # Baseline Correction: Apply baseline correction using the mean of the segment
    raw.apply_function(lambda x: x - np.mean(x), picks = 'eeg')
    
    return raw

In [7]:
# Calling the Preprocessing Function:

participant_1_preprocessed_dataset = preprocess_raw_data(participant_1_COI_dataset)
participant_2_preprocessed_dataset = preprocess_raw_data(participant_2_COI_dataset)
participant_3_preprocessed_dataset = preprocess_raw_data(participant_3_COI_dataset)
participant_4_preprocessed_dataset = preprocess_raw_data(participant_4_COI_dataset)
participant_5_preprocessed_dataset = preprocess_raw_data(participant_5_COI_dataset)
participant_6_preprocessed_dataset = preprocess_raw_data(participant_6_COI_dataset)
participant_7_preprocessed_dataset = preprocess_raw_data(participant_7_COI_dataset)
participant_8_preprocessed_dataset = preprocess_raw_data(participant_8_COI_dataset)
participant_9_preprocessed_dataset = preprocess_raw_data(participant_9_COI_dataset)
participant_10_preprocessed_dataset = preprocess_raw_data(participant_10_COI_dataset)
participant_11_preprocessed_dataset = preprocess_raw_data(participant_11_COI_dataset)
participant_12_preprocessed_dataset = preprocess_raw_data(participant_12_COI_dataset)
participant_13_preprocessed_dataset = preprocess_raw_data(participant_13_COI_dataset)

##### Segmentation of Data:

In [8]:
# Segmentation occurs as following:

# First 30 secs -> removed since these serve as the adjustment period
# 60 secs -> "I"
# 30 secs -> Break
# 60 secs -> "Yes"
# 30 secs -> Break
# 60 secs -> "No"
# 30 secs -> Break
# 60 secs -> "Want"
# 30 secs -> Break
# 60 secs -> "Help"
# 30 secs -> Break
# 60 secs -> "More"
# 30 secs -> Break
# 60 secs -> "That"
# 30 secs -> Break
# 60 secs -> "Stop"
# 30 secs -> Break
# 60 secs -> "Open"
# 30 secs -> Break
# 60 secs -> "Close"
# Remaining time remove as it is redundant

In [9]:
# Function to Extract Segments:

def extract_segment(raw, start_sec, duration_sec, label, sfreq):

    start_sample = int(start_sec * sfreq)
    end_sample = start_sample + int(duration_sec * sfreq)
    segment = raw[:, start_sample:end_sample][0]
    
    return segment, label

In [10]:
# Segmentation Details:

segments = [
    (30, 60, "I"),
    (120, 60, "Yes"),
    (210, 60, "No"),
    (300, 60, "Want"),
    (390, 60, "Help"),
    (480, 60, "More"),
    (570, 60, "That"),
    (660, 60, "Stop"),
    (750, 60, "Open"),
    (840, 60, "Close")
]

In [11]:
# Extracting Segments for each Individual Participant Part 1:

# Extract segments for Participant 1:
sfreq_participant_1 = participant_1_preprocessed_dataset.info['sfreq']
segments_participant_1 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_1_preprocessed_dataset, start, duration, label, sfreq_participant_1)
    segments_participant_1.append((segment, label))

# Extract segments for Participant 2:
sfreq_participant_2 = participant_2_preprocessed_dataset.info['sfreq']
segments_participant_2 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_2_preprocessed_dataset, start, duration, label, sfreq_participant_2)
    segments_participant_2.append((segment, label))

# Extract segments for Participant 3:
sfreq_participant_3 = participant_3_preprocessed_dataset.info['sfreq']
segments_participant_3 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_3_preprocessed_dataset, start, duration, label, sfreq_participant_3)
    segments_participant_3.append((segment, label))

# Extract segments for Participant 4:
sfreq_participant_4 = participant_4_preprocessed_dataset.info['sfreq']
segments_participant_4 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_4_preprocessed_dataset, start, duration, label, sfreq_participant_4)
    segments_participant_4.append((segment, label))

# Extract segments for Participant 5:
sfreq_participant_5 = participant_5_preprocessed_dataset.info['sfreq']
segments_participant_5 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_5_preprocessed_dataset, start, duration, label, sfreq_participant_5)
    segments_participant_5.append((segment, label))

In [12]:
# Extracting Segments for each Individual Participant Part 2:

# Extract segments for Participant 6:
sfreq_participant_6 = participant_6_preprocessed_dataset.info['sfreq']
segments_participant_6 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_6_preprocessed_dataset, start, duration, label, sfreq_participant_6)
    segments_participant_6.append((segment, label))

# Extract segments for Participant 7:
sfreq_participant_7 = participant_7_preprocessed_dataset.info['sfreq']
segments_participant_7 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_7_preprocessed_dataset, start, duration, label, sfreq_participant_7)
    segments_participant_7.append((segment, label))

# Extract segments for Participant 8:
sfreq_participant_8 = participant_8_preprocessed_dataset.info['sfreq']
segments_participant_8 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_8_preprocessed_dataset, start, duration, label, sfreq_participant_8)
    segments_participant_8.append((segment, label))

# Extract segments for Participant 9:
sfreq_participant_9 = participant_9_preprocessed_dataset.info['sfreq']
segments_participant_9 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_9_preprocessed_dataset, start, duration, label, sfreq_participant_9)
    segments_participant_9.append((segment, label))

# Extract segments for Participant 10:
sfreq_participant_10 = participant_10_preprocessed_dataset.info['sfreq']
segments_participant_10 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_10_preprocessed_dataset, start, duration, label, sfreq_participant_10)
    segments_participant_10.append((segment, label))

In [13]:
# Extracting Segments for each Individual Participant Part 3:

# Extract segments for Participant 11:
sfreq_participant_11 = participant_11_preprocessed_dataset.info['sfreq']
segments_participant_11 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_11_preprocessed_dataset, start, duration, label, sfreq_participant_11)
    segments_participant_11.append((segment, label))

# Extract segments for Participant 12:
sfreq_participant_12 = participant_12_preprocessed_dataset.info['sfreq']
segments_participant_12 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_12_preprocessed_dataset, start, duration, label, sfreq_participant_12)
    segments_participant_12.append((segment, label))

# Extract segments for Participant 13:
sfreq_participant_13 = participant_13_preprocessed_dataset.info['sfreq']
segments_participant_13 = []

for start, duration, label in segments:
    segment, label = extract_segment(participant_13_preprocessed_dataset, start, duration, label, sfreq_participant_13)
    segments_participant_13.append((segment, label))

In [14]:
# Dispalying Segments for Participant 1:

segments_participant_1

[(array([[ 2.13238279e-04,  2.05378182e-04,  1.97904898e-04, ...,
          -8.24021941e-06, -1.83166731e-05, -2.90499871e-05],
         [ 1.72501481e-04,  1.69083200e-04,  1.65870594e-04, ...,
          -5.31618240e-07, -8.17158515e-06, -1.95097882e-05],
         [ 2.02836421e-04,  1.92313730e-04,  1.88531476e-04, ...,
           8.07180431e-06,  1.96599803e-06, -3.20881890e-06],
         ...,
         [ 3.01715790e-04,  2.85883755e-04,  2.72007228e-04, ...,
           4.12278394e-05,  3.37222051e-05,  2.58178307e-05],
         [ 2.60535811e-04,  2.50385791e-04,  2.45094255e-04, ...,
           5.66471968e-05,  4.43954440e-05,  3.10132867e-05],
         [ 2.50443676e-04,  2.37106637e-04,  2.27500428e-04, ...,
           4.29710885e-05,  3.23122293e-05,  2.26700920e-05]]),
  'I'),
 (array([[-1.29501473e-05, -1.64963172e-05, -1.24686881e-05, ...,
          -1.39144399e-06,  2.76310702e-07, -2.60156098e-06],
         [ 2.21400884e-05,  1.78141132e-05,  2.14257918e-05, ...,
           1.4

##### Creating Sub Epochs:

In [15]:
# Function to Create Sub-Epochs from Segments:

def create_sub_epochs(segment, epoch_duration, sfreq):

    n_samples_per_epoch = int(epoch_duration * sfreq)
    n_epochs = segment.shape[1] // n_samples_per_epoch
    sub_epochs = []
    
    for i in range(n_epochs):
        start_sample = i * n_samples_per_epoch
        end_sample = start_sample + n_samples_per_epoch
        sub_epoch = segment[:, start_sample:end_sample]
        sub_epochs.append(sub_epoch)
    
    return sub_epochs

In [16]:
# Defining Sub-Epoch Duration:

sub_epoch_duration = 2

In [17]:
# Creating Sub-Epochs for each Individual Participant Part 1:

# Create sub-epochs for Participant 1
sub_epochs_participant_1 = []
for segment, label in segments_participant_1:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_1)
    sub_epochs_participant_1.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 2
sub_epochs_participant_2 = []
for segment, label in segments_participant_2:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_2)
    sub_epochs_participant_2.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 3
sub_epochs_participant_3 = []
for segment, label in segments_participant_3:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_3)
    sub_epochs_participant_3.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 4
sub_epochs_participant_4 = []
for segment, label in segments_participant_4:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_4)
    sub_epochs_participant_4.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 5
sub_epochs_participant_5 = []
for segment, label in segments_participant_5:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_5)
    sub_epochs_participant_5.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

In [18]:
# Creating Sub-Epochs for each Individual Participant Part 2:

# Create sub-epochs for Participant 6
sub_epochs_participant_6 = []
for segment, label in segments_participant_6:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_6)
    sub_epochs_participant_6.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 7
sub_epochs_participant_7 = []
for segment, label in segments_participant_7:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_7)
    sub_epochs_participant_7.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 8
sub_epochs_participant_8 = []
for segment, label in segments_participant_8:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_8)
    sub_epochs_participant_8.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 9
sub_epochs_participant_9 = []
for segment, label in segments_participant_9:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_9)
    sub_epochs_participant_9.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 10
sub_epochs_participant_10 = []
for segment, label in segments_participant_10:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_10)
    sub_epochs_participant_10.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

In [19]:
# Creating Sub-Epochs for each Individual Participant Part 3:

# Create sub-epochs for Participant 11
sub_epochs_participant_11 = []
for segment, label in segments_participant_11:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_11)
    sub_epochs_participant_11.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 12
sub_epochs_participant_12 = []
for segment, label in segments_participant_12:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_12)
    sub_epochs_participant_12.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

# Create sub-epochs for Participant 13
sub_epochs_participant_13 = []
for segment, label in segments_participant_13:
    sub_epochs = create_sub_epochs(segment, sub_epoch_duration, sfreq_participant_13)
    sub_epochs_participant_13.extend([(sub_epoch, label) for sub_epoch in sub_epochs])

In [20]:
# Combine Sub-Epoch Data into DataFrames:

data_sub_epochs_participant_1 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_1], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_2 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_2], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_3 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_3], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_4 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_4], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_5 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_5], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_6 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_6], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_7 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_7], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_8 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_8], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_9 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_9], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_10 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_10], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_11 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_11], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_12 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_12], columns=['Label', 'Sub_Epoch'])
data_sub_epochs_participant_13 = pd.DataFrame([(label, sub_epoch) for sub_epoch, label in sub_epochs_participant_13], columns=['Label', 'Sub_Epoch'])


# Save Sub-Epoch Data to CSV files for Verification:

data_sub_epochs_participant_1.to_csv('Data/SubEpochData/Participant_1_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_2.to_csv('Data/SubEpochData/Participant_2_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_3.to_csv('Data/SubEpochData/Participant_3_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_4.to_csv('Data/SubEpochData/Participant_4_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_5.to_csv('Data/SubEpochData/Participant_5_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_6.to_csv('Data/SubEpochData/Participant_6_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_7.to_csv('Data/SubEpochData/Participant_7_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_8.to_csv('Data/SubEpochData/Participant_8_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_9.to_csv('Data/SubEpochData/Participant_9_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_10.to_csv('Data/SubEpochData/Participant_10_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_11.to_csv('Data/SubEpochData/Participant_11_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_12.to_csv('Data/SubEpochData/Participant_12_Sub_Epoch_Data.csv', index = False)
data_sub_epochs_participant_13.to_csv('Data/SubEpochData/Participant_13_Sub_Epoch_Data.csv', index = False)

In [21]:
# Display the Sub-Epoch Data for Verification:

print("Participant 1 Sub-Epoch Data:")
print(data_sub_epochs_participant_1.head())

Participant 1 Sub-Epoch Data:
  Label                                          Sub_Epoch
0     I  [[0.00021323827937150136, 0.000205378181898957...
1     I  [[6.528388254172846e-05, 6.793395112452262e-05...
2     I  [[-1.197846936741574e-05, -1.5899529935307284e...
3     I  [[1.62035004669331e-05, 1.534839092153962e-05,...
4     I  [[1.908828371455047e-06, -9.281953529624032e-0...


##### Constructing Final Dataset:

In [22]:
# Combine Sub-Epoch Data from all Participants into a Single Dataset:

concatenated_sub_epoch_dataset = pd.concat([data_sub_epochs_participant_1, 
                                            data_sub_epochs_participant_2, 
                                            data_sub_epochs_participant_3,
                                            data_sub_epochs_participant_4,
                                            data_sub_epochs_participant_5,
                                            data_sub_epochs_participant_6,
                                            data_sub_epochs_participant_7,
                                            data_sub_epochs_participant_8,
                                            data_sub_epochs_participant_9,
                                            data_sub_epochs_participant_10,
                                            data_sub_epochs_participant_11,
                                            data_sub_epochs_participant_12,
                                            data_sub_epochs_participant_13]).reset_index(drop = True)

In [23]:
# Check the First few rows to ensure the Concatenation was Successful:
print("Combined Sub-Epoch Data (first few rows):")
print(concatenated_sub_epoch_dataset.head())

Combined Sub-Epoch Data (first few rows):
  Label                                          Sub_Epoch
0     I  [[0.00021323827937150136, 0.000205378181898957...
1     I  [[6.528388254172846e-05, 6.793395112452262e-05...
2     I  [[-1.197846936741574e-05, -1.5899529935307284e...
3     I  [[1.62035004669331e-05, 1.534839092153962e-05,...
4     I  [[1.908828371455047e-06, -9.281953529624032e-0...


In [24]:
# Save the Combined Dataset to CSV:

concatenated_sub_epoch_dataset.to_csv('Data/FinalDataset/Combined_Sub_Epoch_Final_Dataset.csv', index = False)

##### Feature Extraction:

In [25]:
# Function to Extract Power Spectral Density (PSD) Features:

def extract_psd_features(epochs, bands, sfreq):

    psd_features = []
    logging.info(f"Extracting PSD Features for {len(epochs)} epochs.")

    for epoch, label in epochs:
        psd, freqs = mne.time_frequency.psd_array_multitaper(epoch, sfreq = sfreq, fmin = 0.5, fmax = 30)
        band_powers = {'Label': label}

        for band, (low, high) in bands.items():
            band_power = np.mean(psd[:, (freqs >= low) & (freqs <= high)], axis = 1)

            for i, power in enumerate(band_power):
                band_powers[f'{band}_ch{i}'] = power

        psd_features.append(band_powers)

    logging.info(f"\nPSD Features Extracted: {psd_features[0].keys()}")
    return pd.DataFrame(psd_features)

In [26]:
# Function to Extract Wavelet Transform Features:

def extract_wavelet_features(epochs, wavelet = 'db4', level = 5):

    wavelet_features = []
    logging.info(f"Extracting Wavelet Features for {len(epochs)} epochs.")

    for epoch, label in epochs:
        features = {'Label': label}

        for ch in range(epoch.shape[0]):
            coeffs = pywt.wavedec(epoch[ch], wavelet, level = level)

            for i, coeff in enumerate(coeffs):
                features[f'ch{ch}_coeff{i}_mean'] = np.mean(coeff)
                features[f'ch{ch}_coeff{i}_std'] = np.std(coeff)

        wavelet_features.append(features)

    logging.info(f"\nWavelet Features Extracted: {wavelet_features[0].keys()}")
    return pd.DataFrame(wavelet_features)

In [27]:
# Defining Frequency bands for PSD:

bands = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30),
    'gamma': (31, 50)
}

# Combine all participants' sub-epochs:
all_sub_epochs = (sub_epochs_participant_1 + sub_epochs_participant_2 + sub_epochs_participant_3 + sub_epochs_participant_4 + sub_epochs_participant_5 + sub_epochs_participant_6 + sub_epochs_participant_7 + sub_epochs_participant_8 + sub_epochs_participant_9 + sub_epochs_participant_10 + sub_epochs_participant_11 + sub_epochs_participant_12 + sub_epochs_participant_13)

# Extract PSD Features:
psd_features = extract_psd_features(all_sub_epochs, bands, sfreq_participant_1)
logging.info(f"\nPSD features DataFrame shape: {psd_features.shape}")

# Extract Wavelet Features:
wavelet_features = extract_wavelet_features(all_sub_epochs)
logging.info(f"\nWavelet features DataFrame Shape: {wavelet_features.shape}")

# Combine Features:
combined_features = pd.merge(psd_features, wavelet_features, on = 'Label')
logging.info(f"\nCombined features DataFrame shape: {combined_features.shape}")

# Drop Duplicate Features:
combined_features = combined_features.loc[:,~combined_features.columns.duplicated()]
logging.info(f"\nCombined features DataFrame shape after dropping duplicates: {combined_features.shape}")

# Save combined features to CSV
combined_features.to_csv('Data/FinalDataset/Full_Final_Dataset.csv', index = False)
logging.info(f"\nFinal combined features saved. Number of features: {combined_features.shape[1]}")

2024-07-06 15:32:54,654 - INFO - Extracting PSD Features for 3900 epochs.
2024-07-06 15:32:58,347 - INFO - 
PSD Features Extracted: dict_keys(['Label', 'delta_ch0', 'delta_ch1', 'delta_ch2', 'delta_ch3', 'delta_ch4', 'delta_ch5', 'delta_ch6', 'delta_ch7', 'delta_ch8', 'delta_ch9', 'delta_ch10', 'delta_ch11', 'delta_ch12', 'delta_ch13', 'theta_ch0', 'theta_ch1', 'theta_ch2', 'theta_ch3', 'theta_ch4', 'theta_ch5', 'theta_ch6', 'theta_ch7', 'theta_ch8', 'theta_ch9', 'theta_ch10', 'theta_ch11', 'theta_ch12', 'theta_ch13', 'alpha_ch0', 'alpha_ch1', 'alpha_ch2', 'alpha_ch3', 'alpha_ch4', 'alpha_ch5', 'alpha_ch6', 'alpha_ch7', 'alpha_ch8', 'alpha_ch9', 'alpha_ch10', 'alpha_ch11', 'alpha_ch12', 'alpha_ch13', 'beta_ch0', 'beta_ch1', 'beta_ch2', 'beta_ch3', 'beta_ch4', 'beta_ch5', 'beta_ch6', 'beta_ch7', 'beta_ch8', 'beta_ch9', 'beta_ch10', 'beta_ch11', 'beta_ch12', 'beta_ch13', 'gamma_ch0', 'gamma_ch1', 'gamma_ch2', 'gamma_ch3', 'gamma_ch4', 'gamma_ch5', 'gamma_ch6', 'gamma_ch7', 'gamma_ch8', '

In [28]:
# Display the Extracted Features:

print("Extracted Features:")
print(combined_features.head())

Extracted Features:
  Label     delta_ch0     delta_ch1     delta_ch2     delta_ch3     delta_ch4  \
0     I  2.342114e-07  1.001417e-07  1.727662e-07  1.656557e-07  1.231673e-07   
1     I  2.342114e-07  1.001417e-07  1.727662e-07  1.656557e-07  1.231673e-07   
2     I  2.342114e-07  1.001417e-07  1.727662e-07  1.656557e-07  1.231673e-07   
3     I  2.342114e-07  1.001417e-07  1.727662e-07  1.656557e-07  1.231673e-07   
4     I  2.342114e-07  1.001417e-07  1.727662e-07  1.656557e-07  1.231673e-07   

      delta_ch5     delta_ch6     delta_ch7     delta_ch8  ...  \
0  1.318333e-07  1.307850e-07  1.548563e-07  1.369585e-07  ...   
1  1.318333e-07  1.307850e-07  1.548563e-07  1.369585e-07  ...   
2  1.318333e-07  1.307850e-07  1.548563e-07  1.369585e-07  ...   
3  1.318333e-07  1.307850e-07  1.548563e-07  1.369585e-07  ...   
4  1.318333e-07  1.307850e-07  1.548563e-07  1.369585e-07  ...   

   ch13_coeff1_mean  ch13_coeff1_std  ch13_coeff2_mean  ch13_coeff2_std  \
0          0.000001  