In [61]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from matplotlib.backends.backend_pdf import PdfPages

In [62]:
def print_top_10_raw(filepath):
    print("\nOpening:", filepath)

    with open(filepath, "r", errors="ignore") as f:
        for i in range(10):
            print(f.readline().strip())

def headers(participant_dir):
    participant_name = os.path.basename(participant_dir.rstrip("/"))
    print(f"\nreading {participant_name}")

    # file paths
    airflow_path = os.path.join(participant_dir, "Flow - 30-05-2024.txt")
    thoracic_path = os.path.join(participant_dir, "Thorac - 30-05-2024.txt")
    spo2_path = os.path.join(participant_dir, "SPO2 - 30-05-2024.txt")
    events_path = os.path.join(participant_dir, "Flow Events - 30-05-2024.txt")
    sleep_profile = os.path.join(participant_dir, "Sleep profile - 30-05-2024.txt")

    # print raw top 10 lines
    print_top_10_raw(airflow_path)
    print_top_10_raw(thoracic_path)
    print_top_10_raw(spo2_path)
    print_top_10_raw(events_path)
    print_top_10_raw(sleep_profile)


headers("../Data/AP01")


reading AP01

Opening: ../Data/AP01\Flow - 30-05-2024.txt
Signal Type: Flow_TH_Type
Start Time: 5/30/2024 8:59:00 PM
Sample Rate: 32
Length: 875184
Unit:

Data:
30.05.2024 20:59:00,000; 120
30.05.2024 20:59:00,031; 120
30.05.2024 20:59:00,062; 84

Opening: ../Data/AP01\Thorac - 30-05-2024.txt
Signal Type: Sum RIPs-Reference
Start Time: 5/30/2024 8:59:00 PM
Sample Rate: 32
Length: 875184
Unit:

Data:
30.05.2024 20:59:00,000; 13
30.05.2024 20:59:00,031; 13
30.05.2024 20:59:00,062; 10

Opening: ../Data/AP01\SPO2 - 30-05-2024.txt
Signal Type: SPO2_Type
Start Time: 5/30/2024 8:59:00 PM
Sample Rate: 4
Length: 109398
Unit: %

Data:
30.05.2024 20:59:00,000; 93
30.05.2024 20:59:00,250; 94
30.05.2024 20:59:00,500; 94

Opening: ../Data/AP01\Flow Events - 30-05-2024.txt
Signal ID: FlowD\flow
Start Time: 5/30/2024 8:59:00 PM
Unit: s
Signal Type: Impuls

30.05.2024 23:48:45,119-23:49:01,408; 16;Hypopnea; N1
30.05.2024 23:50:16,578-23:50:33,546; 17;Hypopnea; N1
30.05.2024 23:52:13,626-23:52:27,268; 

In [63]:
# this fun returns pandas DataFrame
def parse_signal_txt(file_path):
    timestamps = []
    values = []

    with open(file_path, "r") as f:
        lines = f.readlines()

# Find where data starts 
    data_start = None
    for i, line in enumerate(lines):
        if line.strip() == "Data:":
            data_start = i + 1
            break

    if data_start is None:
        raise ValueError(f"'Data:' not found in {file_path}")

    for line in lines[data_start:]:
        line = line.strip()
        if not line:
            continue

        try:
            ts_str, val = line.split(";")
            ts = datetime.strptime(ts_str.strip(), "%d.%m.%Y %H:%M:%S,%f")
            val = float(val.strip())

            timestamps.append(ts)
            values.append(val)
        except Exception:
            continue

    df = pd.DataFrame({"value": values}, index=pd.to_datetime(timestamps))
    
    return df

In [64]:
# this fun is for file  Flow Events 
def parse_flow_events(file_path):
    events = []

    with open(file_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        line = line.strip()

        if "-" not in line or ";" not in line:
            continue

        try:
            time_part, _, label, _ = line.split(";")
            start_str, end_time_str = time_part.split("-")

            start_dt = datetime.strptime(
                start_str.strip(), "%d.%m.%Y %H:%M:%S,%f"
            )

            # reconstruct end datetime using same date
            end_dt = datetime.strptime(
                start_dt.strftime("%d.%m.%Y") + " " + end_time_str.strip(),
                
                "%d.%m.%Y %H:%M:%S,%f",
            )

            events.append((start_dt, end_dt, label.strip()))

        except Exception:
            continue
    return events

In [65]:
# this fun print unique breathing event names
def check_all_event_labels(data_root="../Data", n_participants=5):

    all_labels = set()

    print("checking event labels")

    for i in range(1, n_participants + 1):
        pid = f"AP0{i}"
        folder = os.path.join(data_root, pid)

        print(f"{pid}")

# check folder exists
        if not os.path.exists(folder):
            print("  Folder not found\n")
            continue

# find Flow Events file
        events_path = None
        for fname in os.listdir(folder):
            if "flow events" in fname.lower():
                events_path = os.path.join(folder, fname)
                print(f" flow events Matched to {fname}")
                break

        if events_path is None:
            print("  Flow Events file not found\n")
            continue

#parse function
        try:
            events = parse_flow_events(events_path)
# print 1st touple from the list
            print("1st event", events[0])
            # print("1st event ",events[:1])
            labels = sorted(set(label for _, _, label in events))

            for i in labels:
                print(" ", i)
                all_labels.add(i)

        except Exception as e:
            print("  Error parsing:", e)

# summary 
    print("ALL UNIQUE EVENT TYPES:")
    for lab in sorted(all_labels):
        print(" ", lab)

    print(f"\nTotal unique event types: {len(all_labels)}")

check_all_event_labels()

checking event labels
AP01
 flow events Matched to Flow Events - 30-05-2024.txt
1st event (datetime.datetime(2024, 5, 30, 23, 48, 45, 119000), datetime.datetime(2024, 5, 30, 23, 49, 1, 408000), 'Hypopnea')
  Hypopnea
  Obstructive Apnea
AP02
 flow events Matched to Flow Events  - 30.05.2024.txt
1st event (datetime.datetime(2024, 5, 30, 22, 29, 21, 193000), datetime.datetime(2024, 5, 30, 22, 29, 33, 818000), 'Hypopnea')
  Hypopnea
  Obstructive Apnea
AP03
 flow events Matched to Flow Events - 29_05_2024.txt
1st event (datetime.datetime(2024, 5, 29, 23, 30, 3, 246000), datetime.datetime(2024, 5, 29, 23, 30, 12, 871000), 'Hypopnea')
  Hypopnea
  Obstructive Apnea
AP04
 flow events Matched to Flow Events - 29.05.2024.txt
1st event (datetime.datetime(2024, 5, 29, 21, 33, 57, 246000), datetime.datetime(2024, 5, 29, 21, 34, 33, 496000), 'Body event')
  Body event
  Hypopnea
  Obstructive Apnea
AP05
 flow events Matched to Flow Events - 28.05.2024.txt
1st event (datetime.datetime(2024, 5, 28, 

In [66]:
def plot_signal(airflow, thoracic, spo2, events, participant_name,
                       window_minutes=5):

    os.makedirs("../Visualizations", exist_ok=True)
    save_path = f"../Visualizations/{participant_name}_report.pdf"

    start_time = airflow.index[0]
    end_time = airflow.index[-1]
    window = pd.Timedelta(minutes=window_minutes)

    print("Generating PDF")

# event color map 
    EVENT_COLORS = {
        "Hypopnea": "orange",
        "Obstructive Apnea": "red",
        "Body event": "purple",
        "Mixed Apnea": "brown",
    }

    with PdfPages(save_path) as pdf:

        current_start = start_time
        page = 1

        while current_start < end_time:
            current_end = current_start + window

            airflow_w = airflow.loc[current_start:current_end]
            thoracic_w = thoracic.loc[current_start:current_end]
            spo2_w = spo2.loc[current_start:current_end]

            if len(airflow_w) == 0:
                current_start = current_end
                continue

# PLOT
            fig, axes = plt.subplots(3, 1, figsize=(14, 6), sharex=True)

            axes[0].plot(airflow_w.index, airflow_w["value"], linewidth=1)
            axes[0].set_ylabel("Nasal Flow (L/min)")
            axes[0].set_title("Nasal Flow")

            axes[1].plot(thoracic_w.index, thoracic_w["value"], linewidth=1)
            axes[1].set_ylabel("Resp. Amplitude")
            axes[1].set_title("Thoracic/Abdominal Resp.")

            axes[2].plot(spo2_w.index, spo2_w["value"], linewidth=1)
            axes[2].set_ylabel("SpO2 (%)")
            axes[2].set_title("SpO2")
            axes[2].set_xlabel("Time")

#FORMAT TIME AXIS
            locator = mdates.SecondLocator(interval=5)
            formatter = mdates.DateFormatter('%d %H:%M:%S')
            
            axes[2].xaxis.set_major_locator(locator)
            axes[2].xaxis.set_major_formatter(formatter)
            
            plt.setp(axes[2].get_xticklabels(), rotation=90)

# OVERLAY EVENTS
            for start, end, label in events:
                if end < current_start or start > current_end:
                    continue
            
                color = EVENT_COLORS.get(label, "red")
                mid_time = start + (end - start) / 2
            
                for ax in axes:
                    ax.axvspan(start, end, alpha=0.25, color=color)
                axes[0].text(
                    mid_time,
                    axes[0].get_ylim()[1] * 0.8,
                    label,
                    color=color,
                    fontsize=8,
                    ha="center",
                    bbox=dict(facecolor='white', alpha=0.6, edgecolor='none')
                )

 # GRID
            for ax in axes:
                ax.grid(True, alpha=0.3)

            fig.suptitle(
                f"{participant_name} | Page {page} | "
                f"{current_start:%Y-%m-%d %H:%M:%S} â†’ {current_end:%H:%M:%S}",
                fontsize=11
            )

            plt.tight_layout()
            pdf.savefig(fig)
            plt.close(fig)

            current_start = current_end
            page += 1

    print("Saved", save_path)

In [67]:
# iterate and show the name of all the file in folderd
for i in range(1, 6):
    print(os.listdir(f"../Data/AP0{i}"))

['Flow - 30-05-2024.txt', 'Flow Events - 30-05-2024.txt', 'Sleep profile - 30-05-2024.txt', 'SPO2 - 30-05-2024.txt', 'Thorac - 30-05-2024.txt']
['Flow  - 30.05.2024.txt', 'Flow Events  - 30.05.2024.txt', 'Sleep profile  - 30.05.2024.txt', 'SPO2  - 30.05.2024.txt', 'Thorac  - 30.05.2024.txt']
['Flow - 29_05_2024.txt', 'Flow Events - 29_05_2024.txt', 'Sleep profile - 29_05_2024.txt', 'SPO2 - 29_05_2024.txt', 'Thorac - 29_05_2024.txt']
['Flow Events - 29.05.2024.txt', 'Flow Signal - 29.05.2024.txt', 'Sleep profile - 29.05.2024.txt', 'SPO2 Signal - 29.05.2024.txt', 'Thorac Signal - 29.05.2024.txt']
['Flow Events - 28.05.2024.txt', 'Flow Nasal - 28.05.2024.txt', 'Sleep profile - 28.05.2024.txt', 'SPO2 - 28.05.2024.txt', 'Thorac Movement - 28.05.2024.txt']


In [68]:
def find_file(folder, keyword, exclude=None):
#  this function helps to find file name i have use this because file names sre dirrent in folders 
    matches = []
    for fname in os.listdir(folder):
        name = fname.lower()

        if keyword.lower() in name:
            if exclude and exclude.lower() in name:
                continue
            matches.append(fname)

    if len(matches) == 0:
        print(f"\n Files in {folder}:")
        for f in os.listdir(folder):
            print("   ", f)
        raise FileNotFoundError(f" '{keyword}' not found in {folder}")
        
# if mpre then one file name matches then raise error
    if len(matches) > 1:
        raise ValueError(f"Multiple '{keyword}' files found in {folder}")

    chosen = matches[0]
    print(f" Matched [{keyword}] to {chosen}")

    return os.path.join(folder, chosen)
    
# Visualization
def visualize_participant(participant_dir):
    participant_name = os.path.basename(participant_dir.rstrip("/"))

    print(f"Processing {participant_name}")

    # Load signals
    airflow_path = find_file(participant_dir, "Flow" ,exclude="Event")
    thoracic_path = find_file(participant_dir, "Thorac")
    spo2_path = find_file(participant_dir, "SPO2")
    events_path = find_file(participant_dir, "Flow Events")
        
    # Load
    airflow = parse_signal_txt(airflow_path)
    print("1st row of airflow ",airflow[:1])
    thoracic = parse_signal_txt(thoracic_path)
    print("1st row of thoracic ",thoracic[:1])
    spo2 = parse_signal_txt(spo2_path)
    print("1st row of spo2",spo2[:1])
    events = parse_flow_events(events_path)

    print("Loaded:",len(airflow), len(thoracic), len(spo2),"samples and", len(events),"events")
    
 #l plot
    plot_signal(airflow, thoracic, spo2, events, participant_name)


In [69]:
# visualize_participant("../Data/AP01")

In [70]:
# this iterate to al folders and file files 
for i in range(1, 6):
    visualize_participant(f"../Data/AP0{i}")

Processing AP01
 Matched [Flow] to Flow - 30-05-2024.txt
 Matched [Thorac] to Thorac - 30-05-2024.txt
 Matched [SPO2] to SPO2 - 30-05-2024.txt
 Matched [Flow Events] to Flow Events - 30-05-2024.txt
1st row of airflow                       value
2024-05-30 20:59:00  120.0
1st row of thoracic                       value
2024-05-30 20:59:00   13.0
1st row of spo2                      value
2024-05-30 20:59:00   93.0
Loaded: 875184 875184 109398 samples and 161 events
Generating PDF
Saved ../Visualizations/AP01_report.pdf
Processing AP02
 Matched [Flow] to Flow  - 30.05.2024.txt
 Matched [Thorac] to Thorac  - 30.05.2024.txt
 Matched [SPO2] to SPO2  - 30.05.2024.txt
 Matched [Flow Events] to Flow Events  - 30.05.2024.txt
1st row of airflow                       value
2024-05-30 21:22:45    0.0
1st row of thoracic                       value
2024-05-30 21:22:45    0.0
1st row of spo2                      value
2024-05-30 21:22:45    0.0
Loaded: 849672 849672 106209 samples and 186 events
Gen