In [None]:
#Convert .txt file to csv
import pandas as pd
import numpy as np

def map_sleep_stage(stage_str):
    stage_str = str(stage_str).lower()
    if "wake" in stage_str:
        return 0
    elif "n1" in stage_str or "n2" in stage_str:
        return 1
    elif "n3" in stage_str or "sws" in stage_str:
        return 2
    elif "rem" in stage_str:
        return 3
    else:
        return np.nan

def clean_and_fill_hr_column(df):
    df["hr_clean"] = pd.to_numeric(df["hr"], errors='coerce')
    df["hr_clean"] = df["hr_clean"].ffill().bfill()
    return df

def clean_hr_and_stage(input_csv_path, output_csv_path):
    # Load raw data
    df = pd.read_csv(input_csv_path, sep=";", names=["timestamp", "hr", "stage"])
    
    # Clean and fill hr
    df = clean_and_fill_hr_column(df)

    # Map sleep stage strings to numbers
    df["sleepstage"] = df["stage"].apply(map_sleep_stage)

    # Drop rows where sleep stage mapping failed (nan)
    df = df.dropna(subset=["sleepstage"])

    # Keep only needed columns and rename hr_clean to hr
    df_cleaned = df[["hr_clean", "sleepstage"]].rename(columns={"hr_clean": "hr"})

    # Save cleaned data to CSV
    df_cleaned.to_csv(output_csv_path, index=False)

    print(f"Cleaned data saved to {output_csv_path}")


In [None]:
#clean all participants
import os
import glob

def process_all_participants(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    files = glob.glob(os.path.join(input_folder, "*.txt"))

    if not files:
        print("⚠️ No .txt files found in input folder.")
        return

    for file_path in files:
        filename = os.path.basename(file_path)
        participant_id = os.path.splitext(filename)[0]
        output_path = os.path.join(output_folder, f"{participant_id}_cleaned.csv")

        try:
            clean_hr_and_stage(file_path, output_path)
        except Exception as e:
            print(f"❌ Failed to process {filename}: {e}")


process_all_participants("HRV_Model/Heartrate Data", "HR_CSV") #edit input/output directory