In [None]:
# segment_30_fixed_1sec.py
import json
import os
import time
import numpy as np

INPUT_DIR = ## Set input directory ##
OUTPUT_DIR =## Set output directory ##
ORIGINAL_FS = 100
SEGMENT_SAMPLES_100HZ = ORIGINAL_FS
SEGMENTS_PER_UID = 30
INCLUDE_1HZ = True

os.makedirs(OUTPUT_DIR, exist_ok=True)

def longest_valid_block(arr):
    valid = np.isfinite(arr)
    if not np.any(valid):
        return 0, -1
    diff = np.diff(np.concatenate(([False], valid, [False]))).astype(int)
    starts = np.where(diff == 1)[0]
    ends = np.where(diff == -1)[0] - 1
    lengths = ends - starts + 1
    best = np.argmax(lengths)
    return int(starts[best]), int(ends[best])

def make_30_segments(signal_100hz, uid, case_id):
    start_idx, end_idx = longest_valid_block(signal_100hz)
    if end_idx < start_idx:
        print(f"UID {uid}: no valid data")
        return []
    valid = signal_100hz[start_idx:end_idx + 1]
    total_samples = len(valid)
    min_needed = SEGMENTS_PER_UID * SEGMENT_SAMPLES_100HZ + (SEGMENTS_PER_UID - 1) * SEGMENT_SAMPLES_100HZ
    if total_samples < min_needed:
        print(f"UID {uid}: only {total_samples} samples, not enough for 30 segments")
        return []
    step_samples = (total_samples - SEGMENT_SAMPLES_100HZ) // (SEGMENTS_PER_UID - 1)
    segments = []
    for i in range(SEGMENTS_PER_UID):
        s = i * step_samples
        e = s + SEGMENT_SAMPLES_100HZ
        seg_100hz = valid[s:e]
        seg_1hz = None
        if INCLUDE_1HZ:
            seg_1hz = [float(np.mean(seg_100hz))]
        start_sec = (start_idx + s) / ORIGINAL_FS
        center_sec = start_sec + 0.5
        segments.append({
            "case_id": str(case_id),
            "uid": str(uid),
            "segment_idx": i,
            "start_sample": int(start_idx + s),
            "end_sample": int(start_idx + e - 1),
            "start_time_sec": round(start_sec, 3),
            "center_time_sec": round(center_sec, 3),
            "duration_sec": 1.0,
            "data_100hz": seg_100hz.tolist(),
            "data_1hz": seg_1hz
        })
    print(f"UID {uid}: 30 segments, step={step_samples} samples ({step_samples/ORIGINAL_FS:.2f}s)")
    return segments

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, (np.floating, np.float64)):
            return float(obj)
        if isinstance(obj, (np.integer, np.int64)):
            return int(obj)
        return super().default(obj)

def process_file(json_path):
    fname = os.path.basename(json_path)
    out_path = os.path.join(OUTPUT_DIR, f"seg30_{fname}")
    print(f"\n=== {fname} ===")
    t0 = time.time()
    with open(json_path, "r") as f:
        data = json.load(f)
    if not isinstance(data, dict):
        print("Not a dict of UID: list")
        return
    result = {}
    total_seg = 0
    for uid_str, values in data.items():
        uid = int(uid_str)
        case_id = uid
        sig = np.array(values, dtype=np.float64)
        segs = make_30_segments(sig, uid, case_id)
        if segs:
            result.setdefault(str(case_id), {})[str(uid)] = segs
            total_seg += len(segs)
    if total_seg:
        with open(out_path, "w") as f:
            json.dump(result, f, indent=2, cls=NumpyEncoder)
        print(f"Saved {total_seg} segments to {out_path} ({time.time()-t0:.2f}s)")
    else:
        print("No segments produced.")

def main():
    json_files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.lower().endswith('.json')]
    print(f"Found {len(json_files)} JSON files")
    for fp in json_files:
        process_file(fp)
    print("done")

if __name__ == "__main__":
    main()