In [1]:
from __future__ import annotations

from pathlib import Path
import json, sys, io, hashlib, urllib.request, urllib.error, ssl
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from PIL import Image, ImageTk

# ================== Cấu hình mặc định ==================
DEFAULT_CAPTIONS_JSON = r"json_to_check/5000_image_caption_dict.json"
APP_TITLE = "Kiểm tra Caption"

# Thư mục cache ảnh tải từ URL
CACHE_DIR = Path.home() / ".caption_reviewer_cache"
CACHE_DIR.mkdir(parents=True, exist_ok=True)


# ================== Tiện ích chung ==================
def pad12(n:int)->str:
    return f"{int(n):012d}"

def filename_from_url_or_id(url:str|None, image_id:int)->str:
    if url:
        name = Path(url).name
        if name:
            return name
    return f"{pad12(image_id)}.jpg"

def load_captions_json(json_path:Path):
    """
    Hỗ trợ 2 dạng:
    1) { url: {image_id, english[], vietnamese[]}, ... }
    2) {"items": [ {image_id, filename?, english[], vietnamese[], url?}, ... ]}
    Trả về: list dict {image_id, filename, english(list), vietnamese(list), url?}
    """
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    items = []
    if isinstance(data, dict) and "items" in data and isinstance(data["items"], list):
        for it in data["items"]:
            iid = int(it.get("image_id", 0))
            en = it.get("english", [])
            vi = it.get("vietnamese", [])
            fn = it.get("filename") or filename_from_url_or_id("", iid)
            url = it.get("url")
            items.append({"image_id": iid, "filename": fn, "english": en, "vietnamese": vi, "url": url})
        return items

    for url, payload in data.items():
        iid = int(payload.get("image_id", 0))
        en = payload.get("english", [])
        vi = payload.get("vietnamese", [])
        fn = filename_from_url_or_id(url, iid)
        items.append({"image_id": iid, "filename": fn, "english": en, "vietnamese": vi, "url": url})
    return items

def _safe_user_agent_request(url: str, timeout: float = 20.0):
    req = urllib.request.Request(
        url,
        headers={"User-Agent": "CaptionReviewer/1.0 (+https://example.local)", "Accept": "*/*"},
        method="GET",
    )
    ctx = ssl.create_default_context()
    return urllib.request.urlopen(req, timeout=timeout, context=ctx)

def cache_key_for_url(url: str)->str:
    return hashlib.sha1(url.encode("utf-8")).hexdigest()[:12]

def get_cached_file_for(url: str, fallback_name: str)->Path:
    key = cache_key_for_url(url)
    safe_name = fallback_name.replace("/", "_").replace("\\", "_")
    return CACHE_DIR / f"{key}_{safe_name}"

def ensure_image_cached(url: str, image_id: int)->Path:
    filename = filename_from_url_or_id(url, image_id)
    cache_file = get_cached_file_for(url, filename)
    if cache_file.exists() and cache_file.stat().st_size > 0:
        return cache_file
    try:
        with _safe_user_agent_request(url, timeout=25) as resp:
            data = resp.read()
        im = Image.open(io.BytesIO(data)).convert("RGB")
        if cache_file.suffix.lower() not in (".jpg", ".jpeg", ".png", ".webp", ".bmp"):
            cache_file = cache_file.with_suffix(".jpg")
        im.save(cache_file)
        return cache_file
    except Exception as e:
        raise RuntimeError(f"Không tải được ảnh từ URL: {url}\n{e}")


# ======= Export callback ở mức module =======
def export_edits_cmd(app: "ReviewerApp"):
    if not app.edits:
        messagebox.showinfo(APP_TITLE, "Chưa có chỉnh sửa để export.")
        return
    out = filedialog.asksaveasfilename(defaultextension=".json", initialfile="edits.json",
                                       filetypes=[("JSON files","*.json")])
    if not out:
        return
    data = {
        "updated_at": __import__("datetime").datetime.now().isoformat(),
        "total": len(app.edits),
        "edits": app.edits
    }
    try:
        with open(out, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        messagebox.showinfo(APP_TITLE, f"Đã lưu: {out}")
    except Exception as e:
        messagebox.showerror(APP_TITLE, f"Lỗi lưu:\n{e}")


# ================== Ứng dụng chính ==================
class ReviewerApp(tk.Tk):
    def __init__(self, json_path:Path):
        super().__init__()
        self.title(APP_TITLE)
        self.geometry("1200x820")
        self.minsize(980, 700)

        self.json_path = Path(json_path)

        self.items: list[dict] = []
        self.items_ready: list[dict] = []
        self.skipped: list[dict] = []
        self.index = 0
        self.photo: ImageTk.PhotoImage|None = None
        self.edits: dict[str, dict] = {}

        self._current_image_source: tuple[str, Path|None, str|None] | None = None

        self._build_menu()
        self._build_ui()
        self._bind_keys()
        self.canvas.bind("<Configure>", lambda e: self._redraw_current())

        if not self.json_path.exists():
            self.after(100, lambda: messagebox.showerror(APP_TITLE, f"Không tìm thấy file JSON:\n{self.json_path}"))
        else:
            self.load_dataset(self.json_path)

    # ---------- Menu ----------
    def _build_menu(self):
        m = tk.Menu(self)
        filem = tk.Menu(m, tearoff=0)
        filem.add_command(label="Reload JSON", command=lambda: self.load_dataset(self.json_path))
        filem.add_separator()
        filem.add_command(label="Export edits.json…", command=lambda: export_edits_cmd(self))
        filem.add_separator()
        filem.add_command(label="Exit", command=self.destroy)
        m.add_cascade(label="File", menu=filem)
        self.config(menu=m)

    def _build_ui(self):
        top = ttk.Frame(self); top.pack(side="top", fill="x", padx=10, pady=8)
        self.idx_var = tk.StringVar(value="0/0")
        self.id_var  = tk.StringVar(value="image_id: -")
        ttk.Label(top, textvariable=self.idx_var).pack(side="left")
        ttk.Label(top, textvariable=self.id_var ).pack(side="left", padx=(10,0))
        ttk.Button(top, text="Save", command=self.save_current).pack(side="right", padx=(0,8))
        ttk.Button(top, text="Next ►", command=self.next_item).pack(side="right", padx=(0,8))
        ttk.Button(top, text="◄ Prev", command=self.prev_item).pack(side="right")

        main = ttk.PanedWindow(self, orient="vertical")
        main.pack(side="top", fill="both", expand=True, padx=10, pady=10)

        top_pane = ttk.Frame(main)
        main.add(top_pane, weight=3)
        self.canvas = tk.Canvas(top_pane, bg="#0b0c10", highlightthickness=0)
        self.canvas.pack(fill="both", expand=True)

        bot_pane = ttk.Frame(main)
        main.add(bot_pane, weight=2)

        cols = ttk.PanedWindow(bot_pane, orient="horizontal"); cols.pack(fill="both", expand=True)

        left = ttk.Frame(cols); cols.add(left, weight=1)
        ttk.Label(left, text="English captions (click to choose)", foreground="#4f46e5").pack(anchor="w")
        self.en_list = tk.Listbox(left, height=6, selectmode="browse"); self.en_list.pack(fill="x", padx=2, pady=(0,8))
        self.en_list.bind("<<ListboxSelect>>", self._on_select_en)

        ttk.Label(left, text="Vietnamese captions (click to select for editing)", foreground="#4f46e5").pack(anchor="w")
        self.vi_list = tk.Listbox(left, height=6, selectmode="browse"); self.vi_list.pack(fill="x", padx=2, pady=(0,8))
        self.vi_list.bind("<<ListboxSelect>>", self._on_select_vi)

        right = ttk.Frame(cols); cols.add(right, weight=1)
        ttk.Label(right, text="Edit Vietnamese caption").pack(anchor="w")
        self.vi_text = tk.Text(right, height=10, wrap="word"); self.vi_text.pack(fill="both", expand=True, padx=2, pady=(0,8))
        ttk.Label(
            right,
            text="Enter: thay thế mục đang chọn & sang ảnh tiếp • Shift+Enter: xuống dòng • Ctrl+S: lưu"
        ).pack(anchor="w")

    def _bind_keys(self):
        self.vi_text.bind("<Return>", self._on_return)
        self.vi_text.bind("<Shift-Return>", self._on_shift_return)
        self.bind("<Left>",  lambda e: self.prev_item())
        self.bind("<Right>", lambda e: self.next_item())
        self.bind("<Control-s>", lambda e: (self.save_current(), "break"))

    # ---------- Dataset ----------
    def load_dataset(self, json_path:Path):
        try:
            self.items = load_captions_json(json_path)
        except Exception as e:
            messagebox.showerror(APP_TITLE, f"Không đọc được JSON:\n{e}")
            return

        self.items_ready.clear(); self.skipped.clear()
        for it in self.items:
            if it.get("url"):
                if not isinstance(it.get("vietnamese"), list):
                    it["vietnamese"] = []
                self.items_ready.append(it)
            else:
                self.skipped.append(it)

        if not self.items_ready:
            messagebox.showwarning(APP_TITLE, "Không có ảnh nào có thể hiển thị (thiếu URL).")
            return

        self.index = 0
        self._load_sidecar_edits()
        self.render()

        if self.skipped:
            messagebox.showinfo(APP_TITLE, f"Loaded {len(self.items_ready)} ảnh. Bỏ qua {len(self.skipped)} mục không có URL.")

    # ---------- Sidecar edits ----------
    def _sidecar_path(self)->Path:
        return Path(str(self.json_path) + ".edits.json")

    def _load_sidecar_edits(self):
        p = self._sidecar_path()
        if p.exists():
            try:
                with open(p,"r",encoding="utf-8") as f:
                    data = json.load(f)
                if isinstance(data, dict):
                    self.edits = data.get("edits", {})
            except Exception:
                self.edits = {}
        else:
            self.edits = {}

    def _persist_sidecar(self):
        p = self._sidecar_path()
        payload = {"source_json": str(self.json_path) if self.json_path else None, "edits": self.edits}
        try:
            with open(p,"w",encoding="utf-8") as f:
                json.dump(payload, f, ensure_ascii=False, indent=2)
        except Exception as e:
            print("Persist error:", e, file=sys.stderr)

    # ---------- Helpers ----------
    def _refresh_vi_listbox(self, item: dict, select_index: int|None = None):
        """Cập nhật Listbox hiển thị các caption tiếng Việt theo item['vietnamese']."""
        self.vi_list.delete(0, "end")
        for s in (item.get("vietnamese") or []):
            self.vi_list.insert("end", (s or "").strip())
        if select_index is not None and 0 <= select_index < self.vi_list.size():
            self.vi_list.select_set(select_index)
            self.vi_list.see(select_index)

    # ---------- Render ----------
    def render(self):
        item = self.items_ready[self.index]
        self.idx_var.set(f"{self.index+1}/{len(self.items_ready)}")
        self.id_var.set(f"image_id: {item['image_id']}")
        self._show_image_for_item(item)

        # English list
        self.en_list.delete(0,"end")
        for s in item.get("english", []):
            self.en_list.insert("end", (s or "").strip())
        chosen_en = self.edits.get(str(item["image_id"]), {}).get("chosen_en")
        if chosen_en:
            try:
                idx = list(map(str.strip, item.get("english", []))).index(chosen_en)
                self.en_list.select_set(idx); self.en_list.see(idx)
            except ValueError:
                pass

        # Vietnamese list
        self._refresh_vi_listbox(item)

        edited = (self.edits.get(str(item["image_id"]), {}).get("caption_vi") or "").strip()
        self.vi_text.delete("1.0","end")
        if edited:
            self.vi_text.insert("1.0", edited)
        else:
            first_vi = (item.get("vietnamese") or [""])[0]
            self.vi_text.insert("1.0", (first_vi or "").strip())
        self.vi_text.focus_set(); self.vi_text.mark_set("insert", "end-1c")

    def _redraw_current(self):
        if not self.items_ready or self._current_image_source is None:
            return
        try:
            self._show_image_for_item(self.items_ready[self.index], reuse_source=True)
        except Exception:
            pass

    def _show_image_for_item(self, item: dict, reuse_source: bool = False):
        cw = max(200, self.canvas.winfo_width())
        ch = max(200, self.canvas.winfo_height())
        self.canvas.delete("all")
        loading_text = self.canvas.create_text(cw//2, ch//2, text="Loading image...", fill="#ffffff", font=("Segoe UI", 12))
        self.update_idletasks()

        def open_image()->Image.Image:
            url = item.get("url")
            if not url:
                raise RuntimeError("Item không có 'url' để hiển thị.")
            if reuse_source and self._current_image_source and self._current_image_source[0] == "url":
                cache_file = ensure_image_cached(url, item["image_id"])
                return Image.open(cache_file).convert("RGB")
            cache_file = ensure_image_cached(url, item["image_id"])
            self._current_image_source = ("url", None, url)
            return Image.open(cache_file).convert("RGB")

        try:
            im = open_image()
        except Exception as e:
            self.canvas.delete(loading_text)
            messagebox.showerror(APP_TITLE, f"Lỗi mở/tải ảnh:\n{e}")
            return

        im.thumbnail((cw-20, ch-20))
        self.photo = ImageTk.PhotoImage(im)
        self.canvas.delete("all")
        self.canvas.create_image(cw//2, ch//2, image=self.photo)

    # ---------- Events ----------
    def _on_select_en(self, _):
        sel = self.en_list.curselection()
        if not sel: return
        idx = sel[0]; txt = self.en_list.get(idx)
        item = self.items_ready[self.index]
        key = str(item["image_id"])
        ed = self.edits.get(key, {})
        ed["chosen_en"] = txt
        self.edits[key] = ed
        self._persist_sidecar()

    def _on_select_vi(self, _):
        sel = self.vi_list.curselection()
        if not sel: return
        idx = sel[0]; txt = self.vi_list.get(idx)
        self.vi_text.delete("1.0","end")
        self.vi_text.insert("1.0", txt)
        self.vi_text.focus_set(); self.vi_text.mark_set("insert","end-1c")

    def _on_return(self, _):
        saved = self.save_current()
        self.next_item()
        return "break"

    def _on_shift_return(self, _):
        self.vi_text.insert("insert","\n"); return "break"

    def _apply_vi_edit_to_item(self, item: dict, new_text: str) -> int:
        """
        Thay thế caption VI đang chọn bằng new_text.
        - Nếu không có mục đang chọn nhưng list không rỗng: thay phần tử đầu (index 0)
        - Nếu list rỗng: thêm phần tử mới ở index 0
        Trả về index đã thay thế/thêm.
        """
        new_text = (new_text or "").strip()
        if not isinstance(item.get("vietnamese"), list):
            item["vietnamese"] = []

        vi_list = item["vietnamese"]
        if not new_text:
            return -1

        sel = self.vi_list.curselection()
        if sel:
            idx = sel[0]
            vi_list[idx] = new_text
            return idx
        else:
            if vi_list:
                vi_list[0] = new_text
                return 0
            else:
                vi_list.append(new_text)
                return 0

    def save_current(self):
        """Thay thế caption VI đang chọn bằng nội dung trong vi_text + lưu sidecar edits."""
        if not self.items_ready:
            return False

        item = self.items_ready[self.index]
        key = str(item["image_id"])
        txt = self.vi_text.get("1.0","end-1c").strip()

        replaced_index = self._apply_vi_edit_to_item(item, txt)
        if replaced_index < 0:
            self.bell()
            return False

        self._refresh_vi_listbox(item, select_index=replaced_index)

        ed = self.edits.get(key, {})
        ed["caption_vi"] = txt
        ed["replaced_index"] = replaced_index
        self.edits[key] = ed
        self._persist_sidecar()
        return True

    def next_item(self):
        if self.index < len(self.items_ready) - 1:
            self.index += 1; self.render()
        else:
            messagebox.showinfo(APP_TITLE, "Đã đến ảnh cuối.")

    def prev_item(self):
        if self.index > 0:
            self.index -= 1; self.render()


def main():
    json_path = Path(DEFAULT_CAPTIONS_JSON)
    app = ReviewerApp(json_path=json_path)
    app.mainloop()

if __name__ == "__main__":
    main()
