In [2]:
import csv
from patient_info import PatientInfo, TimepointInfo

In [3]:
def parse_csv(filepath: str) -> list[dict]:
    # Parse a CSV file and return a list of dictionaries representing each row.
    rows = []
    with open(filepath, newline="", encoding="utf-8-sig") as f:
        reader = csv.DictReader(f, delimiter=",")
        
        for row in reader:
            rows.append(row)

    if rows == []:
        raise ValueError("The CSV file is empty or improperly formatted.")
    return rows

In [4]:
def list_patients(rows: list[dict]) -> list[PatientInfo]:
    # Convert extracted rows into a list of PatientInfo objects.
    patients = []
    current_patient_id = None
    patient = None
    timepoints_for_patient = {}

    for row in rows:
        row_patient_id = row.get("Patient")
        
        if current_patient_id != row_patient_id:
            if patient is not None:
                patient.timepoint_infos = timepoints_for_patient
                patients.append(patient)
            
            current_patient_id = row_patient_id
            patient = PatientInfo()
            patient.id = row_patient_id
            patient.sex = row.get("Sex")
            timepoints_for_patient = {}

        timepoint_info = TimepointInfo(
            timepoint = row.get("Timepoint"),
            age = float(row["Age"]) if row.get("Age") else None,
            ms_type = row.get("MS Type") if row.get("MS Type") else None,
            edss = float(row["EDSS"]) if row.get("EDSS") else None,
            lesion_number = int(row["Lesion Number"]) if row.get("Lesion Number") else None,
            lesion_volume = float(row["Lesion Volume"]) if row.get("Lesion Volume") else None)
        
        patient.timepoint_counter += 1
        
        if timepoint_info.timepoint:
            timepoints_for_patient[timepoint_info.timepoint] = timepoint_info

    if patient is not None:
        patient.timepoint_infos = timepoints_for_patient
        patients.append(patient)

    return patients


In [5]:
if __name__ == "__main__":
    # Functionality test
    rows = parse_csv("clinical_data.csv")
    patients = list_patients(rows)
    for i in range(3):
        print(patients[i])
    print(patients[0].timepoint_infos["T1"])
    print(patients[1].timepoint_infos["T2"])


id = P1,
sex = F,
timepoint_number = 3,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001FB03168610>, 'T2': <patient_info.TimepointInfo object at 0x000001FB0316ADD0>, 'T3': <patient_info.TimepointInfo object at 0x000001FB0316B590>},


id = P2,
sex = M,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001FB02DBBA10>, 'T2': <patient_info.TimepointInfo object at 0x000001FB03160D10>, 'T3': <patient_info.TimepointInfo object at 0x000001FB03191CD0>, 'T4': <patient_info.TimepointInfo object at 0x000001FB03191D90>},


id = P3,
sex = F,
timepoint_number = 4,
timepoint_infos = {'T1': <patient_info.TimepointInfo object at 0x000001FB03191E50>, 'T2': <patient_info.TimepointInfo object at 0x000001FB03191E90>, 'T3': <patient_info.TimepointInfo object at 0x000001FB03191ED0>, 'T4': <patient_info.TimepointInfo object at 0x000001FB03191F10>},


timepoint = T1,
age = 28.09,
ms_type = SMRR,
edss = 3.5,
lesion_number = 18,
lesion_volume = 20674