In [2]:
import csv
from io import StringIO

def smart_split(line):
    return next(csv.reader(StringIO(line), skipinitialspace=True))

def process_astronomy_data(input_file, output_file):
    with open(input_file, 'r') as f:
        lines = f.readlines()

    sections = {
        "## XRAY_DATA": {
            "keep": [1, 2, 3,4, 5, 6, 7, 8, 9, 10, 11],
            "filter_col": 2
        },
        "## RADIO_DATA": {
            "keep": [1, 2, 3, 4,5, 6, 7, 8, 9, 10, 11, 12],
            "filter_col": 4,
            "swap_1_2": True
        }
    }

    current_section = None
    result = []

    for line in lines:
        stripped = line.strip()

        if stripped.startswith("##"):
            current_section = stripped
            result.append(stripped + "\n")

            if current_section == "## XRAY_DATA":
                result.append("# obsID, t_xray, dt_xray, Xphase, Xstate, Fx, Fx_unc_l, Fx_unc_u, Fx_uplim, model, fit_stat"+ "\n") 
            elif current_section == "## RADIO_DATA":
                result.append("# blockID, band, t_radio, dt_radio, Rphase, Rstate, Fr, Fr_unc, Fr_uplim, local_rms, alpha, alpha_unc"+ "\n")
            continue

        if stripped == "":
            result.append("\n")
            continue

        try:
            row = smart_split(line)
        except Exception:
            continue  # skip malformed line

        if current_section not in sections:
            result.append(line)
            continue

        config = sections[current_section]


        # Special fix for XRAY_DATA: if column 9 (index 8) is empty, copy from column 8 (index 7)
        if current_section == "## XRAY_DATA":
            if len(row) >= 8 and (row[8].strip() == "" or row[8].lower() == "nan"):
                row[8] = row[7]

        # Filter out invalid rows
        if config.get("filter_col") is not None:
            idx = config["filter_col"]
            if idx >= len(row) or row[idx].strip() == "" or row[idx].lower() == "nan":
                continue

        # Extract required columns
        try:
            selected = [row[i] for i in config["keep"]]
        except IndexError:
            continue

        # Swap column 1 and 2 if required
        if config.get("swap_1_2"):
            selected[0], selected[1] = selected[1], selected[0]

        result.append(",".join(selected) + "\n")

    with open(output_file, "w") as f:
        f.writelines(result)


In [3]:
names = ["Swift J1727.8-163", "Swift J1728.9-3613", "Swift J1842.5-1124", "Vela X-1", "Swift J1858.6-0814", "XTE J1701-462"]

In [4]:
process_astronomy_data("./ORIGINAL/Cir X-1_orig.csv", "Cir X-1.txt")