In [21]:
# login_utils.py (hoặc 1 cell riêng trong Jupyter)
# -*- coding: utf-8 -*-
import json
from playwright.sync_api import sync_playwright
\
def clean_cookies(cookies):
    cleaned_cookies = []
    allowed_samesite = ["Strict", "Lax", "None"]

    for c in cookies:
        nc = {
            "name": c.get("name"),
            "value": c.get("value"),
            "domain": c.get("domain"),
            "path": c.get("path", "/"),
            "secure": c.get("secure", True),
            "httpOnly": c.get("httpOnly", False),
        }

        ss = c.get("sameSite", "Lax")
        if ss is None or ss not in allowed_samesite:
            nc["sameSite"] = "None" if nc["secure"] else "Lax"
        else:
            nc["sameSite"] = ss

        if "expires" in c and isinstance(c["expires"], (int, float)):
            nc["expires"] = c["expires"]

        cleaned_cookies.append(nc)

    return cleaned_cookies


def get_job_detail_html(url: str, cookies_file: str = "cookies.json") -> str:
    """
    Dùng Playwright + cookies có sẵn để mở 1 URL chi tiết job,
    trả về HTML (string) để bạn xử lý tiếp trong Jupyter.
    """
    # 1. Đọc cookies
    with open(cookies_file, "r", encoding="utf-8") as f:
        raw_cookies = json.load(f)
    manual_cookies = clean_cookies(raw_cookies)

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)

        my_ua = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/131.0.0.0 Safari/537.36"
        )
        context = browser.new_context(user_agent=my_ua)
        context.add_cookies(manual_cookies)

        page = context.new_page()
        page.add_init_script(
            "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
        )

        page.goto(url, wait_until="domcontentloaded", timeout=60000)
        page.wait_for_timeout(5000)

        html = page.content()  # trả về HTML trang tổng quan job

        browser.close()

    return html