In [14]:
from pathlib import Path
import pandas as pd
from config.settings import SAVE_PATH 
import glob
def check_pairwise_files(cache_dir: Path):
    """
    Scan every *_pairwise.csv in `cache_dir`.
    Return a list of (file_path, problem_message) for files that fail the
    int-conversion test on columns i, j, count.
    """
    bad_files = []
    cache_dir = Path(cache_dir)   
    for csv_path in cache_dir.glob("*_pairwise.csv"):
        print('paths ' , csv_path)
        try:
            df = pd.read_csv(csv_path)

            # make sure the expected columns are there
            required = {"i", "j", "count"}
            missing  = required.difference(df.columns)
            if missing:
                raise ValueError(f"missing columns {missing}")

            # try to coerce to integer (errors='raise' makes pandas throw)
            for col in required:
                pd.to_numeric(df[col], errors="raise").astype(int)

        except Exception as exc:          # anything goes wrong → mark as bad
            bad_files.append((csv_path, str(exc)))

    return bad_files


if __name__ == "__main__":
    offenders = check_pairwise_files(SAVE_PATH)

    if not offenders:
        print("✓ All *_pairwise.csv files are clean.")
    else:
        print("⚠ Problematic files:")
        for path, msg in offenders:
            print(f"  {path.name}: {msg}")


paths  /home/asj53/BOScheduling/results/sp25/(206, 4, 21)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(275, 1, 20)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(251, 2, 23)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(268, 2, 21)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(265, 0, 20)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(259, 6, 20)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(203, 3, 20)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(259, 3, 22)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(286, 4, 23)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(221, 1, 23)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(218, 2, 23)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(273, 2, 20)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(250, 2, 21)_pairwise.csv
paths  /home/asj53/BOScheduling/results/sp25/(222, 2, 20)_pairwise.csv
⚠ Prob

In [None]:
from pathlib import Path
import pandas as pd, re

def bad_rows(path, expected_cols):
    df = pd.read_csv(path, dtype=str)           # keep as strings
    mask = df[expected_cols].applymap(          # True if any char not 0-9 or -
        lambda x: x is pd.NA or bool(re.search(r"[^0-9\-]", str(x))))
    return df[mask.any(axis=1)]

base = Path(SAVE_PATH)
for suffix, cols in [("_pairwise.csv", ['i','j','count']),
                     ("_triple.csv",   ['i','j','k','count']),
                     ("_quadruple.csv",['i','j','k','l','count'])]:
    print('sug' , suffix)
    for p in base.glob(f"*{suffix}"):
        bad = bad_rows(p, cols)
        print('bad' , bad)
        if not bad.empty:
            
            print(f"\n⚠ bad values in {p.name}")
            print(bad.head())
            

sug _pairwise.csv
sug _triple.csv
sug _quadruple.csv
