In [None]:
import json
import numpy as np
from tqdm import tqdm
import os

INPUT_PATH =  ## Set input file ##
OUTPUT_PATH = ## Set OUTput file ##

def is_valid_ppg(ppg):
    if not isinstance(ppg, list) or len(ppg) != 100:
        return False
    return all(isinstance(x, (int, float)) and np.isfinite(x) for x in ppg)

def clean_ppg_only(data):
    print("Cleaning PPG segments - keeping only valid 100-sample signals...")
    cleaned_data = {}
    total_before = 0
    total_after = 0
    removed = 0

    for case_id, case_data in tqdm(data.items(), desc="Cases"):
        if not isinstance(case_data, dict):
            continue
        cleaned_case = {}
        for uid, segments in case_data.items():
            if not isinstance(segments, list):
                continue
            total_before += len(segments)
            valid_segs = []
            for seg in segments:
                if not isinstance(seg, dict):
                    removed += 1
                    continue
                ppg = seg.get('data')
                if is_valid_ppg(ppg):
                    valid_segs.append({"data": [float(x) for x in ppg]})
                    total_after += 1
                else:
                    removed += 1
            if valid_segs:
                cleaned_case[str(uid)] = valid_segs
        if cleaned_case:
            cleaned_data[str(case_id)] = cleaned_case

    print(f"Segments before: {total_before}")
    print(f"Segments after : {total_after}")
    print(f"Removed        : {removed}")
    return cleaned_data

def main():
    if not os.path.exists(INPUT_PATH):
        print(f"Input file not found: {INPUT_PATH}")
        return

    print(f"Loading {INPUT_PATH}...")
    with open(INPUT_PATH, 'r') as f:
        data = json.load(f)

    print(f"Loaded {len(data)} cases.")
    cleaned = clean_ppg_only(data)

    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
    with open(OUTPUT_PATH, 'w') as f:
        json.dump(cleaned, f, indent=2)

    print(f"Removed Invalid data")

if __name__ == "__main__":
    main()