In [1]:
# -*- coding: utf-8 -*-
"""
Rewrite all existing CSVs:
roi_polygon_wkt -> c1_lon,c1_lat,...,c4_lon,c4_lat
"""

import os
import re
import math
import pandas as pd
from tqdm import tqdm

# ====== CONFIG ======
output_root = "/mnt/cephfs-mount/chenchen/CygnssDataCsv"  # root of CSVs
OVERWRITE = True   # set False if you want *_compact.csv copies

PAIR_RE = re.compile(r"(-?\d+(?:\.\d+)?)\s+(-?\d+(?:\.\d+)?)")

def parse_wkt_corners(wkt):
    """Return 8 numbers from roi_polygon_wkt (NaNs if invalid)."""
    if not isinstance(wkt, str) or not wkt.strip():
        return (math.nan,)*8
    pairs = [(float(x), float(y)) for x,y in PAIR_RE.findall(wkt)]
    if len(pairs) >= 5 and pairs[0] == pairs[-1]:
        pairs = pairs[:-1]
    if len(pairs) < 4:
        return (math.nan,)*8
    c1,c2,c3,c4 = pairs[:4]
    return (c1[0],c1[1],c2[0],c2[1],c3[0],c3[1],c4[0],c4[1])

def process_csv(path):
    df = pd.read_csv(path)
    if "roi_polygon_wkt" not in df.columns:
        return
    corners = df["roi_polygon_wkt"].apply(parse_wkt_corners)
    corners_df = pd.DataFrame(corners.tolist(),
        columns=["c1_lon","c1_lat","c2_lon","c2_lat","c3_lon","c3_lat","c4_lon","c4_lat"])
    df = pd.concat([df.drop(columns=["roi_polygon_wkt"]), corners_df], axis=1)

    if OVERWRITE:
        out_path = path
    else:
        base,ext = os.path.splitext(path)
        out_path = base + "_compact" + ext
    df.to_csv(out_path, index=False)

def main():
    csv_files = []
    for dirpath, _, files in os.walk(output_root):
        for f in files:
            if f.lower().endswith(".csv"):
                csv_files.append(os.path.join(dirpath, f))
    if not csv_files:
        print("No CSVs found under", output_root)
        return
    for f in tqdm(csv_files, desc="Rewriting CSVs"):
        try:
            process_csv(f)
        except Exception as e:
            print("[WARN]", f, e)

if __name__ == "__main__":
    main()

Rewriting CSVs: 100%|███████████████████████| 928/928 [11:56:39<00:00, 46.34s/it]
