QIN2DIM · QIN2DIM · Mar 31, 2024 · Mar 31, 2024 · Apr 7, 2024 · Apr 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -155,3 +155,11 @@ assets/image_label_binary/off_road_vehicle/20**
 profile_pluggable_model.md
 ./*.png
 tests/record_json
+docs/lvm_challenge/*.jpeg
+docs/lvm_challenge/*.png
+logs
+examples/*.md
+docs/logs/
+docs/*.md
+node_modules
+pnpm-lock.yaml
diff --git a/api/main.py b/api/main.py
@@ -0,0 +1,25 @@
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+
+from routers import challenge_router, datalake_router
+
+app = FastAPI()
+
+app.include_router(challenge_router, prefix="/challenge")
+app.include_router(datalake_router, prefix="/datalake")
+
+
+@app.get("/")
+async def home():
+    return RedirectResponse(url="https://github.com/QIN2DIM/hcaptcha-challenger")
+
+
+@app.get("/ping", response_model=str)
+async def ping():
+    return "pong"
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("main:app", host="0.0.0.0", port=33777)
diff --git a/backend/readme.md → api/readme.md b/backend/readme.md → api/readme.md
@@ -1,10 +1,9 @@
 ```bash
-pip install fastapi
-pip install uvicorn[standard]
+pip install fastapi[all]
 ```
 
 ```bash
-uvicorn main:app --reload
+python main.py
 ```
 
 ```markdown

diff --git a/api/routers/__init__.py b/api/routers/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+# Time       : 2024/4/14 12:24
+# Author     : QIN2DIM
+# GitHub     : https://github.com/QIN2DIM
+# Description:
+from .challenge import router as challenge_router
+from .datalake import router as datalake_router
+
+__all__ = ["challenge_router", "datalake_router"]
diff --git a/api/routers/challenge.py b/api/routers/challenge.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# Time       : 2024/4/14 12:25
+# Author     : QIN2DIM
+# GitHub     : https://github.com/QIN2DIM
+# Description:
+
+from fastapi import APIRouter
+from loguru import logger
+
+import hcaptcha_challenger as solver
+from hcaptcha_challenger import ModelHub, register_pipline
+from hcaptcha_challenger.models import SelfSupervisedResponse, SelfSupervisedPayload
+from hcaptcha_challenger.tools.zero_shot_image_classifier import invoke_clip_tool
+
+router = APIRouter()
+
+# Init local-side of the ModelHub
+solver.install(upgrade=True, clip=True)
+
+modelhub = ModelHub.from_github_repo()
+modelhub.parse_objects()
+
+clip_model = register_pipline(modelhub, fmt="onnx")
+logger.success(
+    "register clip_model", tool=clip_model.__class__.__name__, modelhub=modelhub.__class__.__name__
+)
+
+
+@router.post("/image_label_binary", response_model=SelfSupervisedResponse)
+async def challenge_image_label_binary(payload: SelfSupervisedPayload):
+    results = invoke_clip_tool(modelhub, payload, clip_model)
+    return SelfSupervisedResponse(results=results)
diff --git a/api/routers/datalake.py b/api/routers/datalake.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+# Time       : 2024/4/14 12:25
+# Author     : QIN2DIM
+# GitHub     : https://github.com/QIN2DIM
+# Description:
+from fastapi import APIRouter
+
+router = APIRouter()
+
+
+@router.get("/sync", response_model=str)
+async def datalake_sync_from_github_repo():
+    pass
+
+
+@router.get("/list", response_model=list)
+async def datalake_list():
+    pass
diff --git a/assets/image_label_binary/streetlamp/0a250855340d6919d7d40e027e14ddb7.jpeg b/assets/image_label_binary/streetlamp/0a250855340d6919d7d40e027e14ddb7.jpeg
diff --git a/assets/image_label_binary/streetlamp/0dc5bf222566c789b1400017681e6634.jpeg b/assets/image_label_binary/streetlamp/0dc5bf222566c789b1400017681e6634.jpeg
diff --git a/assets/image_label_binary/streetlamp/1c4b5b2a29bd01c9e190203b13191de1.jpeg b/assets/image_label_binary/streetlamp/1c4b5b2a29bd01c9e190203b13191de1.jpeg
diff --git a/assets/image_label_binary/streetlamp/1f625f42bdc6a648b2311b2b8ac9a573.jpeg b/assets/image_label_binary/streetlamp/1f625f42bdc6a648b2311b2b8ac9a573.jpeg
diff --git a/assets/image_label_binary/streetlamp/23006438f204b8032730e54d9c048f03.jpeg b/assets/image_label_binary/streetlamp/23006438f204b8032730e54d9c048f03.jpeg
diff --git a/assets/image_label_binary/streetlamp/2b588e6d099ae24de6bd002373237016.jpeg b/assets/image_label_binary/streetlamp/2b588e6d099ae24de6bd002373237016.jpeg
diff --git a/assets/image_label_binary/streetlamp/2b86207da94d4a29f01325b01794aff1.jpeg b/assets/image_label_binary/streetlamp/2b86207da94d4a29f01325b01794aff1.jpeg
diff --git a/assets/image_label_binary/streetlamp/2bef67821261920c3d1197c971d5db2c.jpeg b/assets/image_label_binary/streetlamp/2bef67821261920c3d1197c971d5db2c.jpeg
diff --git a/assets/image_label_binary/streetlamp/31506b75a59a3ffb9c01038d9757aaf2.jpeg b/assets/image_label_binary/streetlamp/31506b75a59a3ffb9c01038d9757aaf2.jpeg
diff --git a/assets/image_label_binary/streetlamp/33d4fa8df6b13f89bf98225553f612c4.jpeg b/assets/image_label_binary/streetlamp/33d4fa8df6b13f89bf98225553f612c4.jpeg
diff --git a/assets/image_label_binary/streetlamp/3590320e2af3c5b1a59d4f32d4646c74.jpeg b/assets/image_label_binary/streetlamp/3590320e2af3c5b1a59d4f32d4646c74.jpeg
diff --git a/assets/image_label_binary/streetlamp/396123559b43aed482cad3cd9b8ec94c.jpeg b/assets/image_label_binary/streetlamp/396123559b43aed482cad3cd9b8ec94c.jpeg
diff --git a/assets/image_label_binary/streetlamp/4528a9d72c1cfc8e906ffaa1dac031b4.jpeg b/assets/image_label_binary/streetlamp/4528a9d72c1cfc8e906ffaa1dac031b4.jpeg
diff --git a/assets/image_label_binary/streetlamp/50f0778136f0a88d4940bbd346d66aa0.jpeg b/assets/image_label_binary/streetlamp/50f0778136f0a88d4940bbd346d66aa0.jpeg
diff --git a/assets/image_label_binary/streetlamp/5b6bfec1aeabd840a015126cc03d6518.jpeg b/assets/image_label_binary/streetlamp/5b6bfec1aeabd840a015126cc03d6518.jpeg
diff --git a/assets/image_label_binary/streetlamp/6b9528ab5990660c6596018fbf8d7b7c.jpeg b/assets/image_label_binary/streetlamp/6b9528ab5990660c6596018fbf8d7b7c.jpeg
diff --git a/assets/image_label_binary/streetlamp/7260eb6e0ab45f696acf83dee9c2634d.jpeg b/assets/image_label_binary/streetlamp/7260eb6e0ab45f696acf83dee9c2634d.jpeg
diff --git a/assets/image_label_binary/streetlamp/7290a18a41e3346cafd06e7d6dbf1a26.jpeg b/assets/image_label_binary/streetlamp/7290a18a41e3346cafd06e7d6dbf1a26.jpeg
diff --git a/assets/image_label_binary/streetlamp/8f38391ab5dd9cca78c91217f5b8aa27.jpeg b/assets/image_label_binary/streetlamp/8f38391ab5dd9cca78c91217f5b8aa27.jpeg
diff --git a/assets/image_label_binary/streetlamp/8fdeb5c3338f0133f5fe1fe3b9216183.jpeg b/assets/image_label_binary/streetlamp/8fdeb5c3338f0133f5fe1fe3b9216183.jpeg
diff --git a/assets/image_label_binary/streetlamp/902d6009bb13dfa2cac585e2928efd29.jpeg b/assets/image_label_binary/streetlamp/902d6009bb13dfa2cac585e2928efd29.jpeg
diff --git a/assets/image_label_binary/streetlamp/9be25a532efedf77fd9e4d37ee2e301c.jpeg b/assets/image_label_binary/streetlamp/9be25a532efedf77fd9e4d37ee2e301c.jpeg
diff --git a/assets/image_label_binary/streetlamp/a698bd00f059ac0618a420fdf4765f17.jpeg b/assets/image_label_binary/streetlamp/a698bd00f059ac0618a420fdf4765f17.jpeg
diff --git a/assets/image_label_binary/streetlamp/a99e11c8196910a899c846f0a2bfa416.jpeg b/assets/image_label_binary/streetlamp/a99e11c8196910a899c846f0a2bfa416.jpeg
diff --git a/assets/image_label_binary/streetlamp/b6cdbafa182291f9d91c2474d16845d4.jpeg b/assets/image_label_binary/streetlamp/b6cdbafa182291f9d91c2474d16845d4.jpeg
diff --git a/assets/image_label_binary/streetlamp/c03d11a827eed8fc1d6bb083a92dce92.jpeg b/assets/image_label_binary/streetlamp/c03d11a827eed8fc1d6bb083a92dce92.jpeg
diff --git a/assets/image_label_binary/streetlamp/c7ce3abce8d176af001975717d634b7e.jpeg b/assets/image_label_binary/streetlamp/c7ce3abce8d176af001975717d634b7e.jpeg
diff --git a/assets/image_label_binary/streetlamp/d03f4f239191df50459ccd4d1316b9ab.jpeg b/assets/image_label_binary/streetlamp/d03f4f239191df50459ccd4d1316b9ab.jpeg
diff --git a/assets/image_label_binary/streetlamp/d0a7e87990072185fc7b19fb5fe158f8.jpeg b/assets/image_label_binary/streetlamp/d0a7e87990072185fc7b19fb5fe158f8.jpeg
diff --git a/assets/image_label_binary/streetlamp/d40775690548d6fbd5b0594fde70451b.jpeg b/assets/image_label_binary/streetlamp/d40775690548d6fbd5b0594fde70451b.jpeg
diff --git a/assets/image_label_binary/streetlamp/d76bff522ff571847549d8ee847075f6.jpeg b/assets/image_label_binary/streetlamp/d76bff522ff571847549d8ee847075f6.jpeg
diff --git a/assets/image_label_binary/streetlamp/dda8f3edc16e02e5241c21e6e27d6b4c.jpeg b/assets/image_label_binary/streetlamp/dda8f3edc16e02e5241c21e6e27d6b4c.jpeg
diff --git a/assets/image_label_binary/streetlamp/efb5927841ad1d6ec221129c13fe32a4.jpeg b/assets/image_label_binary/streetlamp/efb5927841ad1d6ec221129c13fe32a4.jpeg
diff --git a/automation/collector.py b/automation/collector.py
@@ -27,7 +27,7 @@
 from loguru import logger
 from playwright.async_api import BrowserContext as ASyncContext, async_playwright
 
-from hcaptcha_challenger import split_prompt_message, label_cleaning
+from hcaptcha_challenger import regularize_prompt_message, label_cleaning
 from hcaptcha_challenger.agents import AgentT, Malenia
 
 TEMPLATE_BINARY_DATASETS = """
@@ -81,7 +81,7 @@ def __post_init__(self):
             self.mixed_label = self.issue.title.split(" ")[1].strip()
             self.parent_prompt = self.issue.title.split("@")[-1].strip()
         else:
-            self.mixed_label = split_prompt_message(self.challenge_prompt, lang="en")
+            self.mixed_label = regularize_prompt_message(self.challenge_prompt)
             self.parent_prompt = "image_label_binary"
 
     @classmethod
@@ -245,7 +245,7 @@ async def _collete_datasets(self, context: ASyncContext, sitelink: str):
 
         await page.goto(sitelink)
 
-        await agent.handle_checkbox()
+        await agent.click_checkbox()
 
         for pth in range(1, self.per_times + 1):
             with suppress(Exception):

diff --git a/automation/sentinel.py b/automation/sentinel.py
@@ -22,8 +22,9 @@
 from playwright.async_api import BrowserContext as ASyncContext, async_playwright, Page
 
 import hcaptcha_challenger as solver
-from hcaptcha_challenger import label_cleaning, split_prompt_message
-from hcaptcha_challenger.agents import AgentT, QuestionResp, Malenia
+from hcaptcha_challenger import label_cleaning, regularize_prompt_message
+from hcaptcha_challenger.agents import AgentT, Malenia
+from hcaptcha_challenger.models import QuestionResp
 from hcaptcha_challenger.onnx.yolo import is_matched_ash_of_war
 from hcaptcha_challenger.utils import SiteKey
 
@@ -168,7 +169,7 @@ def _bypass_motion(self):
             since=datetime.now() - timedelta(days=14),
             assignee=self.assignees[0],
         ):
-            mixed_label = split_prompt_message(self.issue_prompt, lang="en")
+            mixed_label = regularize_prompt_message(self.issue_prompt)
             if issue.created_at + timedelta(hours=24) > datetime.now():
                 issue.add_to_labels("🏹 ci: sentinel")
             if mixed_label in issue.title.lower():
@@ -228,7 +229,7 @@ async def collete_datasets(self, context: ASyncContext, sitekey: str, batch: int
         sitelink = SiteKey.as_sitelink(sitekey)
         await page.goto(sitelink)
 
-        await agent.handle_checkbox()
+        await agent.click_checkbox()
 
         for pth in range(1, batch + 1):
             try:

diff --git a/backend/main.py b/backend/main.py
diff --git a/docker/Dockerfile b/docker/Dockerfile
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
diff --git a/examples/clip_datalake.json b/examples/clip_datalake.json
diff --git a/examples/demo_classifier_self_supervised.py b/examples/demo_classifier_self_supervised.py
@@ -14,7 +14,7 @@
 solver.install(upgrade=True, clip=True)
 
 assets_dir = Path(__file__).parent.parent.joinpath("assets")
-images_dir = assets_dir.joinpath("image_label_binary", "off_road_vehicle")
+images_dir = assets_dir.joinpath("image_label_binary/off_road_vehicle")
 
 prompt = "Please click each image containing a sedan car"
 
@@ -33,7 +33,7 @@ def prelude_self_supervised_config():
     modelhub.parse_objects()
     for prompt_, serialized_binary in datalake_post.items():
         modelhub.datalake[prompt_] = DataLake.from_serialized(serialized_binary)
-    clip_model = register_pipline(modelhub)
+    clip_model = register_pipline(modelhub, fmt="onnx")
 
     return modelhub, clip_model
 
@@ -49,13 +49,30 @@ def get_test_images() -> List[Path]:
 
 
 def demo():
+    def output_markdown_preview():
+        """# pip install pandas tabulate"""
+        try:
+            import pandas as pd
+            import tabulate
+        except ImportError:
+            for image_path, result in zip(image_paths, results):
+                print(image_path, f"{result=}")
+        else:
+            output = [
+                {"image": f"![]({image_path})", "result": result}
+                for image_path, result in zip(image_paths, results)
+            ]
+            mdk = pd.DataFrame.from_records(output).to_markdown()
+            mdk = f"- prompt: `{prompt}`\n\n{mdk}"
+            Path("result.md").write_text(mdk, encoding="utf8")
+            print(mdk)
+
     modelhub, clip_model = prelude_self_supervised_config()
     image_paths = get_test_images()
 
     classifier = solver.BinaryClassifier(modelhub=modelhub, clip_model=clip_model)
     if results := classifier.execute(prompt, image_paths, self_supervised=True):
-        for image_path, result in zip(image_paths, results):
-            print(f"{image_path.name=} - {result=} {classifier.model_name=}")
+        output_markdown_preview()
 
 
 if __name__ == "__main__":

diff --git a/examples/demo_find_unique_object.py b/examples/demo_find_unique_object.py
@@ -7,14 +7,14 @@
 from tqdm import tqdm
 
 import hcaptcha_challenger as solver
-from hcaptcha_challenger.components.cv_toolkit.appears_only_once import (
+from hcaptcha_challenger.onnx.modelhub import ModelHub
+from hcaptcha_challenger.onnx.yolo import YOLOv8Seg
+from hcaptcha_challenger.tools.cv_toolkit.appears_only_once import (
     limited_radius,
     annotate_objects,
     find_unique_object,
     find_unique_color,
 )
-from hcaptcha_challenger.onnx.modelhub import ModelHub
-from hcaptcha_challenger.onnx.yolo import YOLOv8Seg
 
 solver.install(upgrade=True)
 

diff --git a/examples/faker_client.py b/examples/faker_client.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+# Time       : 2024/4/1 21:10
+# Author     : QIN2DIM
+# GitHub     : https://github.com/QIN2DIM
+# Description:
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+import dotenv
+from playwright.async_api import async_playwright, BrowserContext
+
+from hcaptcha_challenger.agents import AgentV
+from hcaptcha_challenger.agents import Malenia
+from hcaptcha_challenger.utils import SiteKey
+
+dotenv.load_dotenv()
+
+# 1. You need to deploy sub-thread tasks and actively run `install(upgrade=True)` every 20 minutes
+# 2. You need to make sure to run `install(upgrade=True, clip=True)` before each instantiation
+# from hcaptcha_challenger import install
+# install(upgrade=True, clip=True)
+
+
+async def main(headless: bool = False):
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=headless)
+        context = await browser.new_context(locale="en-US")
+        await Malenia.apply_stealth(context)
+        await mime(context)
+
+        await context.close()
+
+
+async def mime(context: BrowserContext):
+    page = await context.new_page()
+
+    agent = AgentV.into_solver(page=page, tmp_dir=Path("tmp_dir"))
+
+    sitekey = SiteKey.user_easy
+
+    if EXECUTION == "challenge":
+        sitelink = SiteKey.as_sitelink(sitekey)
+        await page.goto(sitelink)
+        await agent.ms.click_checkbox()
+        await agent.wait_for_challenge()
+    elif EXECUTION == "collect":
+        await agent.wait_for_collect(sitekey, batch=2)
+
+
+if __name__ == "__main__":
+    EXECUTION = "collect"
+    # EXECUTION = "challenge"
+
+    encrypted_resp = asyncio.run(main(headless=False))
diff --git a/examples/invoke_remote_solver.py b/examples/invoke_remote_solver.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+# Time       : 2022/9/23 17:28
+# Author     : QIN2DIM
+# Github     : https://github.com/QIN2DIM
+# Description:
+import base64
+import random
+from pathlib import Path
+from typing import List
+
+# pip install pandas tabulate
+import pandas as pd
+from httpx import Client
+
+BASE_URL = "http://localhost:33777"
+client = Client(base_url=BASE_URL, timeout=30)
+
+
+def invoke_remove_tool(self_supervised_payload: dict):
+    response = client.post("/challenge/image_label_binary", json=self_supervised_payload)
+    response.raise_for_status()
+    results = response.json()["results"]
+
+    return results
+
+
+def show_and_cache(image_paths: List[Path], results: List[str], prompt: str):
+    output = [
+        {"image": f"![]({image_path})", "result": result}
+        for image_path, result in zip(image_paths, results)
+    ]
+    mdk = pd.DataFrame.from_records(output).to_markdown()
+    mdk = f"- prompt: `{prompt}`\n\n{mdk}"
+    print(mdk)
+
+    fp = Path(f"results {prompt}.md")
+    fp.write_text(mdk, encoding="utf8")
+    print(f"\nsaved ->> {fp.resolve()}")
+
+
+def run():
+    images_dir = Path(__file__).parent.parent.joinpath("assets/image_label_binary/streetlamp")
+    image_paths = list(images_dir.glob("*.jpeg"))
+    if not image_paths:
+        return
+    random.shuffle(image_paths)
+    image_paths = image_paths[:5]
+
+    prompt = "streetlamp"
+    challenge_images = [base64.b64encode(fp.read_bytes()).decode() for fp in image_paths]
+    self_supervised_payload = {
+        "prompt": prompt,
+        "challenge_images": challenge_images,
+        "positive_labels": ["streetlamp", "light"],
+        "negative_labels": ["duck", "shark", "swan"],
+    }
+
+    results = invoke_remove_tool(self_supervised_payload)
+
+    show_and_cache(image_paths, results, prompt)
+
+
+if __name__ == "__main__":
+    run()