# TODO Use pathlib & f-string to replace the path str.

In [5]:
import plotly.express as px
from pandas import DataFrame

def plot_violin(title: str, df: DataFrame) -> None:
    fig = px.violin(df, title=title, y='Duration (seconds)', box=True, points="all", hover_data='Description')
    fig.show()

In [None]:
import os
import json
import datetime
import pyedflib
import pandas as pd

data_dir = "./physionet.org/files/chbmit/1.0.0/"
df_lst = []

for indexPatient in range(1, 24+1):
    last_end_dt = None; last_edfname = ''
    df = pd.DataFrame(columns=['Duration (seconds)', 'Description'])
    with open(data_dir+'chb%02d/chb%02d' % (indexPatient, indexPatient)+'-summary.txt', 'r') as f:
        for line in f:
            data=line.split(':')
            if(data[0]=="File Name"):
                edfName=data[1].strip()

                pedf = pyedflib.EdfReader(data_dir+'chb%02d/%s' % (indexPatient, edfName))
                startTime = pedf.getStartdatetime()
                if last_end_dt is not None:
                    sec = (startTime - last_end_dt).total_seconds()
                    df.loc[len(df)] = {'Duration (seconds)': sec, 'Description': f"{last_edfname}-->{edfName}:\n{sec}s"}
                last_end_dt = startTime + datetime.timedelta(seconds=pedf.getFileDuration())
                last_edfname = edfName
                
                pedf.close() # TODO replace with 'with' stat

    plot_violin(f"chb{indexPatient:02d}-len(gap)-dist", df)
    df_lst.append(df)


In [7]:
plot_violin("All 24 Patients Gap Length", pd.concat(df_lst))

In [10]:
df_gap = pd.concat(df_lst)
df_gap[df_gap['Duration (seconds)'] > 38.0]

Unnamed: 0,Duration (seconds),Description
19,243.0,chb01_20.edf-->chb01_21.edf:\n243.0s
26,47.0,chb01_27.edf-->chb01_29.edf:\n47.0s
32,10197.0,chb01_34.edf-->chb01_36.edf:\n10197.0s
40,7222.0,chb01_43.edf-->chb01_46.edf:\n7222.0s
0,3610.0,chb02_01.edf-->chb02_02.edf:\n3610.0s
...,...,...
28,7221.0,chb22_51.edf-->chb22_54.edf:\n7221.0s
29,73794.0,chb22_54.edf-->chb22_77.edf:\n73794.0s
1,129.0,chb23_07.edf-->chb23_08.edf:\n129.0s
4,54292.0,chb23_10.edf-->chb23_16.edf:\n54292.0s


In [None]:
data_dir = "./physionet.org/files/chbmit/1.0.0/"
df_lst = []

for indexPatient in range(1, 24+1):
    last_end_dt = None; last_edfname = ''
    df = pd.DataFrame(columns=['Duration (seconds)', 'Description'])
    with open(data_dir+'chb%02d/chb%02d' % (indexPatient, indexPatient)+'-summary.txt', 'r') as f:
        for line in f:
            data=line.split(':')
            if(data[0]=="File Name"):
                edfName=data[1].strip()              
                nextLine = f.readline().strip()
                while (len(nextLine) != 0 and not nextLine.startswith('Number of Seizures in File:')):
                    nextLine = f.readline().strip()
                if nextLine.startswith('Number of Seizures in File:'):
                    for j in range(0, int(nextLine.split(':')[1])):
                        szStartSec = int(f.readline().split(': ')[1].strip().split(' ')[0])
                        szEndSec = int(f.readline().split(': ')[1].strip().split(' ')[0])
                        # pat_json[-1]['Seizures'].append([szStartSec, szEndSec])
                        df.loc[len(df)] = {'Duration (seconds)': szEndSec-szStartSec, 'Description': f"{edfName}: {szEndSec-szStartSec}s"}

    # plot_violin(f"chb{indexPatient:02d}-len(gap)-dist", df)
    df_lst.append(df)
plot_violin("All 24 Patients Onset Length", pd.concat(df_lst))

In [3]:
import json

data_dir = "./physionet.org/files/chbmit/1.0.0/"
output_dir = "unrecorded_chbmit"
continuous_threshold = 38 # Unit (second)
# A floating-point type threshold. When length of gap between two adjacent records is greater than or equal to this value, 
# it indicates there might be a potential onset not recorded within this gap. 
# So the labels around this gap cannot be determined as inter-ictal, pre-ictal or post-icatl. 
# These contaminated non-ictal data will not be used.

dt_fmt = '%Y-%m-%d %H:%M:%S'
for indexPatient in range(1, 24+1):
    last_end_dt = None; last_edfname = ''
    pat_noncontinuous_lst = []
    with open(data_dir+'chb%02d/chb%02d' % (indexPatient, indexPatient)+'-summary.txt', 'r') as f:
        for line in f:
            data=line.split(':')
            if(data[0]=="File Name"):
                edfName=data[1].strip()              
                pedf = pyedflib.EdfReader(data_dir+'chb%02d/%s' % (indexPatient, edfName))
                startTime = pedf.getStartdatetime()
                if last_end_dt is not None:
                    sec = (startTime - last_end_dt).total_seconds()
                    if sec >= continuous_threshold:
                        pat_noncontinuous_lst.append([last_end_dt.strftime(dt_fmt), startTime.strftime(dt_fmt)])
                last_end_dt = startTime + datetime.timedelta(seconds=pedf.getFileDuration())
                last_edfname = edfName
                
                pedf.close() # TODO replace with 'with' stat
    
    with open(os.path.join(output_dir, f"chb{indexPatient:02d}", "unrecorded_gap.json"), "wt") as f:
        json.dump(pat_noncontinuous_lst, f, indent=2)