# TSP求解をAmplifyAEではなくortoolsに変えた正しく実装できるかのでテスト

In [None]:
import json, zipfile, pandas as pd
from pathlib import Path

# ZIPを展開
zip_path = "/mnt/data/Leuven2_before_data.zip"
extract_dir = Path(".")
with zipfile.ZipFile(zip_path, "r") as z:
    z.extractall(extract_dir)

# 初期状態 (centroid_init ではなく iteration_1 から)
cluster_distance_map = {}
records = []

for fp in sorted(extract_dir.glob("**/iteration_*.json")):
    it = int(fp.stem.split("_")[1])
    with open(fp) as f:
        data = json.load(f)

    # そのイテレーションで更新されたクラスタだけ置き換える
    for cluster in data:
        cid = cluster["cluster_id"]
        cluster_distance_map[cid] = float(cluster.get("total_distance", 0.0))

    # 全クラスタの合計を計算
    total = sum(cluster_distance_map.values())
    records.append({"iteration": it, "total_distance": total})

df = pd.DataFrame(records).sort_values("iteration")
display(df)


In [3]:
#!/usr/bin/env python3
import json, re, math
from pathlib import Path
import pandas as pd
# ←必要に応じて変更
BASE = Path("/home/toshiya1048/dev/QA_knap/out/ortools_test")
def count_clusters_from_before_data_dir(dirpath: Path) -> int:
   cand = list(dirpath.glob("*before_data.json")) + list(dirpath.glob("before_data.json"))
   if not cand:
       return 0
   with open(cand[0], "r") as f:
       j = json.load(f)
   # "cluster_数字" を数える
   return sum(1 for k in j.keys() if isinstance(k, str) and re.match(r"cluster_\d+$", k))
def load_iteration_records(path: Path):
   """
   iteration_X.json を読み、{cluster_id:int, total_distance:float} の
   正常レコードのみ返す。異常は reasons リストに積む。
   """
   ok = []
   skipped = []
   with open(path, "r") as fp:
       data = json.load(fp)
   # 形式ゆらぎに対応：listでなければ無視
   if not isinstance(data, list):
       skipped.append((path.name, "top_not_list", str(type(data))))
       return ok, skipped
   for i, rec in enumerate(data):
       if not isinstance(rec, dict):
           skipped.append((path.name, f"item_{i}_not_dict", str(type(rec))))
           continue
       keys = set(rec.keys())
       if "cluster_id" not in rec or "total_distance" not in rec:
           skipped.append((path.name, f"item_{i}_missing_keys", f"keys={sorted(keys)}"))
           continue
       try:
           cid = int(rec["cluster_id"])
           td  = rec["total_distance"]
           if td is None or (isinstance(td, float) and (math.isnan(td) or math.isinf(td))):
               skipped.append((path.name, f"item_{i}_bad_total_distance", f"value={td}"))
               continue
           td = float(td)
           ok.append({"cluster_id": cid, "total_distance": td})
       except Exception as e:
           skipped.append((path.name, f"item_{i}_coerce_error", repr(e)))
   return ok, skipped
def analyze_instance(inst_dir: Path):
   # 例: /.../20251112_110829/Leuven2_before_data
   instance = inst_dir.name.replace("_before_data", "")
   n_clusters = count_clusters_from_before_data_dir(inst_dir)
   it_files = sorted(
       [p for p in inst_dir.glob("iteration_*.json") if p.is_file()],
       key=lambda p: int(p.stem.split("_")[1]) if "_" in p.stem and p.stem.split("_")[1].isdigit() else 0
   )
   last_dist = {}         # cluster_id -> distance（持ち回り）
   rows = []
   all_skipped = []
   for f in it_files:
       it = int(f.stem.split("_")[1]) if "_" in f.stem and f.stem.split("_")[1].isdigit() else 0
       ok, skipped = load_iteration_records(f)
       all_skipped.extend(skipped)
       touched = set()
       for rec in ok:
           cid = rec["cluster_id"]
           td  = rec["total_distance"]
           last_dist[cid] = td
           touched.add(cid)
       total_known = sum(last_dist.values())
       rows.append({
           "instance": instance,
           "iteration": it,
           "total_route_distance_all_clusters(carry_forward)": total_known,
           "num_touched_in_this_iter": len(touched),
           "num_clusters_known": len(last_dist),
           "num_clusters_total(if_known)": n_clusters if n_clusters else None,
       })
   df = pd.DataFrame(rows)
   return df, all_skipped
def main():
   all_rows = []
   skipped_log = []
   # .../<timestamp>/*_before_data を横断
   for ts_dir in sorted([d for d in BASE.iterdir() if d.is_dir()]):
       for inst_dir in sorted(ts_dir.glob("*_before_data")):
           df, sk = analyze_instance(inst_dir)
           if not df.empty:
               all_rows.append(df)
           skipped_log.extend([(ts_dir.name, inst_dir.name, *x) for x in sk])
   if not all_rows:
       print("No iteration_X.json with usable records was found.")
       return
   out = pd.concat(all_rows, ignore_index=True)
   out = out.sort_values(["instance", "iteration"])
   out["delta_from_prev"] = out.groupby("instance")["total_route_distance_all_clusters(carry_forward)"].diff()
   out["improvement_rate_%"] = (
       -100.0 * out["delta_from_prev"] /
       out.groupby("instance")["total_route_distance_all_clusters(carry_forward)"].shift(1)
   )
   csv_path = BASE / "iteration_total_route_summary.csv"
   out.to_csv(csv_path, index=False)
   print(f"✅ saved: {csv_path}")
   last = out.sort_values(["instance","iteration"]).groupby("instance").tail(1)
   print("\n=== Final totals per instance ===")
   print(last[["instance","total_route_distance_all_clusters(carry_forward)",
               "num_clusters_known","num_clusters_total(if_known)"]].to_string(index=False))
   if skipped_log:
       print("\n--- Skipped records (for diagnostics) ---")
       for ts, inst, fname, reason, detail in skipped_log[:50]:  # 多い時は先頭50件だけ表示
           print(f"[{ts}/{inst}] {fname}: {reason} -> {detail}")
       if len(skipped_log) > 50:
           print(f"... ({len(skipped_log)-50} more)")
if __name__ == "__main__":
   main()

✅ saved: /home/toshiya1048/dev/QA_knap/out/ortools_test/iteration_total_route_summary.csv

=== Final totals per instance ===
  instance  total_route_distance_all_clusters(carry_forward)  num_clusters_known num_clusters_total(if_known)
E-n101-k14                                             899.0                  10                         None
  E-n51-k5                                             606.0                   6                         None
   Leuven2                                          105444.0                  29                         None
       out                                           98034.0                  41                         None

--- Skipped records (for diagnostics) ---
[20251112_110829/Leuven2_before_data] iteration_1_swap_timings.json: item_0_missing_keys -> keys=['block_ms', 'from_cluster', 'iteration', 'move_ms', 'moved_indices', 'n_city', 'qa_ms', 'skipped', 'sum_dist_current_after', 'sum_dist_current_before', 'swap_index', 'to_cluster']
[202