Script Name: szDur

Author: Fatemeh Delavari  
Original Version: (02/07/2025)
Version: 2.0 (02/10/2025)  
Description: Calculates duration of seizures

In [176]:
import pandas as pd
import glob
import os

# Path to the folder containing CSV files
folder_path = 'C:/Users/Atena/Documents/csv_files'

# Get all CSV file paths in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Define the target labels to check against
target_labels = ['bckg', 'seiz', 'fnsz', 'gnsz', 'spsz', 'cpsz', 'absz', 
                 'tnsz', 'cnsz', 'tcsz', 'atsz', 'mysz', 'nesz']

# Initialize dictionaries for each label
long_sz = {f'{label}': [] for label in target_labels}

# Loop over each CSV file and process it
for file_path in csv_files:
    # Load the CSV file, skipping the commented lines
    df = pd.read_csv(file_path, comment='#')
    
    # Convert label column to lowercase for case-insensitive matching
    df['label'] = df['label'].str.lower()
    
    # Filter the rows that match the target labels
    matching_df = df[df['label'].isin(target_labels)]
    
    # Calculate the time differences (stop_time - start_time)
    matching_df['time_diff'] = matching_df['stop_time'] - matching_df['start_time']
    
    # Process each label and add the unique sorted values to the corresponding dictionaries
    for label in target_labels:
        label_data = matching_df[matching_df['label'] == label]
        
        # Extract and sort unique duration values
        unique_dur = sorted(set(label_data['time_diff']))

        if any(dur>1800 for dur in unique_dur):
        # if sum(unique_dur)>3000:
            file_name = "".join(file_path)[-22:]
            long_sz[f'{label}'].append(file_name)

In [209]:
import pandas as pd
import glob
import os

# Path to the folder containing CSV files
folder_path = 'C:/Users/Atena/Documents/csv_files'

# Get all CSV file paths in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Define the target labels to check against
target_labels = ['bckg', 'seiz', 'fnsz', 'gnsz', 'spsz', 'cpsz', 'absz', 
                 'tnsz', 'cnsz', 'tcsz', 'atsz', 'mysz', 'nesz']
# target_labels = ['bckg']

# Initialize dictionaries for each label
long_sz = {f'{label}': [] for label in target_labels}

# Loop over each CSV file and process it
for file_path in csv_files:
    # Load the CSV file, skipping the commented lines
    df = pd.read_csv(file_path, comment='#')
    
    # Convert label column to lowercase for case-insensitive matching
    df['label'] = df['label'].str.lower()
    
    # Filter the rows that match the target labels
    matching_df = df[df['label'].isin(target_labels)]
    
    # Calculate the time differences (stop_time - start_time)
    matching_df['time_diff'] = matching_df['stop_time'] - matching_df['start_time']
    
    # Process each label and add the unique sorted values to the corresponding dictionaries
    for label in target_labels:
        label_data = matching_df[matching_df['label'] == label]
        
        # Extract and sort unique duration values
        unique_dur = sorted(set(label_data['time_diff']))

        # Additional code to merge distinct marked start and stop times related to when the seizure starts and ends in each channel for each seizure in one file
        durdur = unique_dur

        unique_start = sorted(set(label_data['start_time']))
        unique_stop = sorted(set(label_data['stop_time']))

        if len(unique_start)>0:

            merged_start_times = [unique_start[0]]
            current_group = unique_start[0]
            for i in range(1, len(unique_start)):
                if unique_start[i] - current_group <= 3600:
                    current_group = unique_start[i]
                else:
                    merged_start_times.append(current_group)
                    current_group = unique_start[i]

            merged_stop_times = [unique_stop[-1]]
            current_group = unique_stop[-1]
            for i in range(len(unique_stop)-1, 0, -1):
                if unique_stop[i] - current_group <= 3600:
                    current_group = unique_stop[i]
                else:
                    merged_stop_times.append(current_group)
                    current_group = unique_stop[i]

            merged_dur = [merged_stop_times[i] - merged_start_times[i] for i in range(len(merged_start_times))]
            durdur = merged_dur
        #

        # if any(dur>300 for dur in unique_dur):
        # if sum(unique_dur)>3000:
        if any(dur>3000 for dur in durdur):
            file_name = "".join(file_path)[-22:]
            long_sz[f'{label}'].append(file_name)

In [173]:
print(long_sz)

{'bckg': ['aaaaaatf_s002_t002.csv', 'aaaaabbc_s001_t000.csv', 'aaaaabgs_s001_t001.csv', 'aaaaabpp_s001_t000.csv', 'aaaaacfa_s004_t001.csv', 'aaaaacna_s001_t000.csv', 'aaaaacut_s001_t000.csv', 'aaaaaddu_s001_t000.csv', 'aaaaadhe_s001_t000.csv', 'aaaaadhp_s002_t001.csv', 'aaaaaedo_s001_t000.csv', 'aaaaaent_s001_t002.csv', 'aaaaagro_s003_t000.csv', 'aaaaagxr_s017_t001.csv', 'aaaaaiad_s002_t000.csv', 'aaaaajud_s001_t000.csv', 'aaaaakks_s001_t000.csv', 'aaaaakro_s009_t000.csv', 'aaaaakvb_s001_t000.csv', 'aaaaaliv_s003_t001.csv', 'aaaaamca_s004_t003.csv', 'aaaaamoa_s007_t001.csv', 'aaaaamoe_s002_t004.csv', 'aaaaamoe_s004_t001.csv', 'aaaaamoe_s005_t000.csv', 'aaaaamqy_s007_t004.csv', 'aaaaamrt_s001_t000.csv', 'aaaaaolm_s001_t000.csv', 'aaaaaosa_s008_t007.csv', 'aaaaaota_s002_t000.csv', 'aaaaapay_s002_t000.csv', 'aaaaapcr_s008_t000.csv', 'aaaaapgf_s001_t000.csv', 'aaaaapks_s005_t000.csv', 'aaaaappt_s004_t000.csv', 'aaaaaprc_s003_t000.csv', 'aaaaaqfx_s008_t001.csv', 'aaaaaqjn_s002_t001.csv', 'a

In [204]:
for key in long_sz:
    print(len(long_sz[key]))

568
0
27
18
1
8
1
0
0
2
0
0
0


In [208]:
for key in long_sz:
    print(len(long_sz[key]))

84
0
9
10
1
1
0
0
0
0
0
0
0


In [210]:
for key in long_sz:
    print(len(long_sz[key]))

17
0
3
3
1
1
0
0
0
0
0
0
0
