In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import butter, filtfilt
import pyedflib
from scipy.signal import welch
from scipy import stats
from tqdm import tqdm

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# function for reading .edf data
def edf_to_dataframe(edf_file_path):
    # Reading the EDF file
    f = pyedflib.EdfReader(edf_file_path)
    
    # Extracting signal labels
    signal_labels = f.getSignalLabels()
    
    # Initializing a dictionary to store signals
    signals_dict = {}
    
    # Extract each signal and store in the dictionary
    for i, label in enumerate(signal_labels):
        signals_dict[label] = f.readSignal(i)
    
    # Close the EDF file
    f.close()
    
    # Convert dictionary to DataFrame
    df = pd.DataFrame(signals_dict)
    
    return df

In [3]:
# reading all the data files
import os

fs = 600

names_data = []
dfs_data = []
labels_data = [] # 0 = control 1 = knockout

folder_path = 'All_KA'
files = os.listdir(folder_path)

for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    if os.path.isfile(file_path):  # Ensure it's a file
        with open(file_path, 'r') as file:
            if '.edf' in file_path:
                try:
                    dfs_data.append(edf_to_dataframe(file_path))
                    names_data.append(file_name)                
                    if 'KO' in file_name :
                        labels_data.append(1)
                    else:
                        labels_data.append(0)
                except Exception as e:
                    print(f"Error reading the file: {e}")

Error reading the file: All_KA/2_CON3.edf: the file is not EDF(+) or BDF(+) compliant (it contains format errors)


In [4]:
# reading the bins for the handpicked time bins

dfs_bin_ictal = []
dfs_bin_interictal = []
names_bin = []
labels_bin = []

folder_path = 'bins'
files = os.listdir(folder_path)

for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    if os.path.isfile(file_path):  # Ensure it's a file
        with open(file_path, 'r') as file:
            if '.xlsx' in file_path:
                try:
                    dfs_bin_ictal.append(pd.read_excel(file_path, sheet_name='ictal', header=0))
                    dfs_bin_interictal.append(pd.read_excel(file_path, sheet_name='interictal', header=0))
                    names_bin.append(file_name[:-5])
                    if 'ko' in file_name :
                        labels_bin.append(1)
                    else:
                        labels_bin.append(0)
                except Exception as e:
                    print(f"Error reading the file: {e}")

In [5]:
# turning the time to index
def time_to_index(t, sampling_rate = 600):
    secs = (t.hour * 3600) + (t.minute * 60)  + t.second + (t.microsecond  / 1e6)
    return int(secs * sampling_rate )

In [6]:
# getting the data frame from name
def get_data(name, dfs_data, names_data):
    
    for i in range(len(names_data)):
        if name.lower() in names_data[i].lower():
            return dfs_data[i]
            break;
# getting the label from name 1 = ko 0 = con
def get_label(name, names_data, labels_data):
    for i in range(len(names_data)):
        if name.lower() in names_data[i].lower():
            return labels_data[i]
            break;

In [7]:
# holders for ictal
trials_ictal = []
names_trial_ictal = []
labels_trial_ictal = []
groups_trial_ictal = []

# holders for interictal
trials_interictal = []
names_trial_interictal = []
labels_trial_interictal = []
groups_trial_interictal = []

for i in range(len(names_bin)):
   
    #ictal
    for j in range(dfs_bin_ictal[i].shape[0]):
        # getting the data
        sidx = time_to_index(dfs_bin_ictal[i]['Sel Start'].values[j])
        duridx = int(dfs_bin_ictal[i]['Sel Duration'].values[j] * fs)
        eidx = sidx + duridx
        trials_ictal.append(get_data(names_bin[i], dfs_data, names_data).values[sidx:eidx,0])
        
        #getting the name
        name = names_bin[i] + '_ictal_' + str(j)
        names_trial_ictal.append(name)
        
        #getting the label
        labels_trial_ictal.append(get_label(names_bin[i], names_data, labels_data))
        
        #getting the group
        groups_trial_ictal.append(i)
        
        
        
    #interictal
    for j in range(dfs_bin_interictal[i].shape[0]):
        # getting the data
        sidx = time_to_index(dfs_bin_interictal[i]['Sel Start'].values[j])
        eidx = time_to_index(dfs_bin_interictal[i]['Sel End'].values[j])
        trials_interictal.append(get_data(names_bin[i], dfs_data, names_data).values[sidx:eidx,0])
        
        #getting the name
        name = names_bin[i] + '_interictal_' + str(j)
        names_trial_interictal.append(name)
        
        #getting the label
        labels_trial_interictal.append(get_label(names_bin[i], names_data, labels_data))
        
        #getting the group
        groups_trial_interictal.append(i)
        


In [8]:
# function for computing the mean integrated power in the frequency band
def compute_mean_integrated_power(data, lowcut, highcut, fs= 600):
    # getting the frequency behavior of the data
    frequencies, power = welch(data, fs, nperseg=2048)
    
    # seperating the wanted frequency band
    band_mask = (frequencies >= lowcut) & (frequencies <= highcut)
    
    # calcultaing the mean power
    integrated_power = np.sum(power[band_mask])
    mean_integrated_power = integrated_power / np.sum(band_mask)
    return mean_integrated_power

In [9]:
# function for finding the peak frequency in a chosen band from a trial
def find_peak_frequency(data, lowcut, highcut, fs= 600):
    # getting the frequency behavior of the data
    frequencies, power = welch(data, fs, nperseg=2048)
    
    # seperating the wanted frequency band
    band_mask = (frequencies >= lowcut) & (frequencies <= highcut)
    
    # getting the max power frequency
    peak_freq_index = np.argmax(power[band_mask])
    peak_frequency = frequencies[band_mask][peak_freq_index]
    return peak_frequency

In [10]:
# getting all the features for all the trials and mice
features_ictal = np.zeros((len(trials_ictal),4)) # Theta mean - Gamma mean - Theta peak - Gamma peak
features_interictal = np.zeros((len(trials_interictal),4)) # Theta mean - Gamma mean - Theta peak - Gamma peak

# ictal
for i in range(len(trials_ictal)):
    # Theta mean
    features_ictal[i,0] = compute_mean_integrated_power(trials_ictal[i], 5, 10)
    # Gamma mean
    features_ictal[i,1] = compute_mean_integrated_power(trials_ictal[i], 30, 80)
    # Theta peak
    features_ictal[i,2] = find_peak_frequency(trials_ictal[i], 5, 10)
    # Gamma peak
    features_ictal[i,3] = find_peak_frequency(trials_ictal[i], 30, 80)
    
# interictal
for i in range(len(trials_interictal)):
    # Theta mean
    features_interictal[i,0] = compute_mean_integrated_power(trials_interictal[i], 5, 10)
    # Gamma mean
    features_interictal[i,1] = compute_mean_integrated_power(trials_interictal[i], 30, 80)
    # Theta peak
    features_interictal[i,2] = find_peak_frequency(trials_interictal[i], 5, 10)
    # Gamma peak
    features_interictal[i,3] = find_peak_frequency(trials_interictal[i], 30, 80)

In [11]:
# finding the mean for each mice
features_mean_ictal = np.zeros((np.max(groups_trial_ictal)+1,4))
features_mean_interictal = np.zeros((np.max(groups_trial_interictal)+1,4))

# ictal 
for i in range(features_mean_ictal.shape[0]):
    group_mask = [x == i for x in groups_trial_ictal]
    features_mean_ictal[i,:] = np.mean(features_ictal[group_mask,:], axis=0)
    

# interictal
for i in range(features_mean_interictal.shape[0]):
    group_mask = [x == i for x in groups_trial_interictal]
    features_mean_interictal[i,:] = np.mean(features_interictal[group_mask,:], axis=0)

In [13]:
# making csv files
column_names = ['Theta Mean', 'Gamma Mean', 'Theta Peak', 'Gamma Peak']

# each trial is a seizure event in a mouse
results_all_ictal = pd.DataFrame(features_ictal, columns=column_names, index=names_trial_ictal)
results_all_interictal = pd.DataFrame(features_interictal, columns=column_names, index=names_trial_interictal)

# each trial is the mean of all seizure events for a mice
results_mean_ictal = pd.DataFrame(features_mean_ictal, columns=column_names, index=names_bin)
results_mean_interictal = pd.DataFrame(features_mean_interictal, columns=column_names, index=names_bin)

# seperating ko and con
# all
results_all_ictal_ko = results_all_ictal[np.array(labels_trial_ictal) == 1] # all - ictal - con
results_all_ictal_con = results_all_ictal[np.array(labels_trial_ictal) == 0] # all - ictal - ko

results_all_interictal_ko = results_all_interictal[np.array(labels_trial_interictal) == 1] # all - interictal - con
results_all_interictal_con = results_all_interictal[np.array(labels_trial_interictal) == 0] # all - interictal - ko

# mean
results_mean_ictal_ko = results_mean_ictal[np.array(labels_bin) == 1] # mean - ictal - con
results_mean_ictal_con = results_mean_ictal[np.array(labels_bin) == 0] # mean - ictal - ko

results_mean_interictal_ko = results_mean_interictal[np.array(labels_bin) == 1] # mean - interictal - con
results_mean_interictal_con = results_mean_interictal[np.array(labels_bin) == 0] # mean - interictal - ko

# saving
results_all_ictal_ko.to_csv('./final/results_all_ictal_ko.csv')
results_all_ictal_con.to_csv('./final/results_all_ictal_con.csv')
results_all_interictal_ko.to_csv('./final/results_all_interictal_ko.csv')
results_all_interictal_con.to_csv('./final/results_all_interictal_con.csv')
results_mean_ictal_ko.to_csv('./final/results_mean_ictal_ko.csv')
results_mean_ictal_con.to_csv('./final/results_mean_ictal_con.csv')
results_mean_interictal_ko.to_csv('./final/results_mean_interictal_ko.csv')
results_mean_interictal_con.to_csv('./final/results_mean_interictal_con.csv')