In [2]:
from pathlib import Path
import pandas as pd
from jsonschema.benchmarks.const_vs_enum import valid, value


def convert_csv_datetime(csv_path: Path, save_path: Path):
    """Reads a CSV file, converts 'start', 'end', and 'single_marker' columns to datetime, and saves back."""
    df = pd.read_csv(csv_path)
    for col in ['start', 'end', 'single_marker']:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors='coerce')
    df.to_csv(save_path, index=False)


convert_csv_datetime(
    Path('/Users/julian/Developer/SeizurePredictionThesis/test_files/annotations_to_combine/start_marker_end.csv'),
    Path(
        '/Users/julian/Developer/SeizurePredictionThesis/test_files/annotations_to_combine/start_marker_end_datetime.csv')
)

In [None]:
from datetime import datetime
import itertools
import pyedflib


def remove_additional_duplicates(for_mayo: Path) -> list[Path]:
    """Remove duplicates that were not caught by fdupes. For patient M39K4B3C, these are files that from Visit V5b that are also copied into V5d or V5e
    :return: a list of removed files."""
    patient_folder = for_mayo / 'M39K4B3C'
    removed_files = []

    # The bounds of visit 5b:
    time_format = "%Y-%m-%d %H:%M:%S.%f"
    v5a_end = datetime.strptime('2021-06-15 11:00:00.015300', time_format)
    v5c_start = datetime.strptime('2021-07-15 10:39:44.0', time_format)

    v5b_files = list(patient_folder.glob('V5b/*.edf'))
    v5d_files = patient_folder.glob('V5d/*.edf')
    v5e_files = patient_folder.glob('V5e/*.edf')
    potential_duplicates = itertools.chain(v5d_files, v5e_files)

    # loop through all potential duplicates and see if they are duplicates
    for edf_path in potential_duplicates:
        # Check if it's contained in V5b
        if edf_path.name in v5b_files:
            # Assert that the datetime corresponds to V5b
            edf = pyedflib.EdfReader(str(edf_path))
            start_dt = edf.getStartdatetime()
            if v5a_end < start_dt < v5c_start:
                print(f"Removing {edf_path}")
                removed_files.append(edf_path)
                edf_path.unlink()
            else:
                raise ValueError(f"File {edf_path}'s start datetime is outside of V5b bounds. Thus it should be removed somewhere else.")

    return removed_files


In [1]:
from pathlib import Path
lst = [Path('a/b/c'), Path('d/e')]

str_lst = []
for item in lst:
    str_lst.append(str(item))


def save_list_as_txt(list_: list, save_path: Path):
    with open(save_path, 'w') as f:
        for item in list_:
            f.write(f'{item}\n')


str_lst

['a/b/c', 'd/e']