Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.11 #980

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open

v0.11 #980

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,11 @@ assets/image_label_binary/off_road_vehicle/20**
profile_pluggable_model.md
./*.png
tests/record_json
docs/lvm_challenge/*.jpeg
docs/lvm_challenge/*.png
logs
examples/*.md
docs/logs/
docs/*.md
node_modules
pnpm-lock.yaml
25 changes: 25 additions & 0 deletions api/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from fastapi import FastAPI
from fastapi.responses import RedirectResponse

from routers import challenge_router, datalake_router

app = FastAPI()

app.include_router(challenge_router, prefix="/challenge")
app.include_router(datalake_router, prefix="/datalake")


@app.get("/")
async def home():
return RedirectResponse(url="https://github.com/QIN2DIM/hcaptcha-challenger")


@app.get("/ping", response_model=str)
async def ping():
return "pong"


if __name__ == "__main__":
import uvicorn

uvicorn.run("main:app", host="0.0.0.0", port=33777)
5 changes: 2 additions & 3 deletions backend/readme.md → api/readme.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
```bash
pip install fastapi
pip install uvicorn[standard]
pip install fastapi[all]
```

```bash
uvicorn main:app --reload
python main.py
```

```markdown
Expand Down
9 changes: 9 additions & 0 deletions api/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
# Time : 2024/4/14 12:24
# Author : QIN2DIM
# GitHub : https://github.com/QIN2DIM
# Description:
from .challenge import router as challenge_router
from .datalake import router as datalake_router

__all__ = ["challenge_router", "datalake_router"]
32 changes: 32 additions & 0 deletions api/routers/challenge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Time : 2024/4/14 12:25
# Author : QIN2DIM
# GitHub : https://github.com/QIN2DIM
# Description:

from fastapi import APIRouter
from loguru import logger

import hcaptcha_challenger as solver
from hcaptcha_challenger import ModelHub, register_pipline
from hcaptcha_challenger.models import SelfSupervisedResponse, SelfSupervisedPayload
from hcaptcha_challenger.tools.zero_shot_image_classifier import invoke_clip_tool

router = APIRouter()

# Init local-side of the ModelHub
solver.install(upgrade=True, clip=True)

modelhub = ModelHub.from_github_repo()
modelhub.parse_objects()

clip_model = register_pipline(modelhub, fmt="onnx")
logger.success(
"register clip_model", tool=clip_model.__class__.__name__, modelhub=modelhub.__class__.__name__
)


@router.post("/image_label_binary", response_model=SelfSupervisedResponse)
async def challenge_image_label_binary(payload: SelfSupervisedPayload):
results = invoke_clip_tool(modelhub, payload, clip_model)
return SelfSupervisedResponse(results=results)
18 changes: 18 additions & 0 deletions api/routers/datalake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# Time : 2024/4/14 12:25
# Author : QIN2DIM
# GitHub : https://github.com/QIN2DIM
# Description:
from fastapi import APIRouter

router = APIRouter()


@router.get("/sync", response_model=str)
async def datalake_sync_from_github_repo():
pass


@router.get("/list", response_model=list)
async def datalake_list():
pass
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions automation/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from loguru import logger
from playwright.async_api import BrowserContext as ASyncContext, async_playwright

from hcaptcha_challenger import split_prompt_message, label_cleaning
from hcaptcha_challenger import regularize_prompt_message, label_cleaning
from hcaptcha_challenger.agents import AgentT, Malenia

TEMPLATE_BINARY_DATASETS = """
Expand Down Expand Up @@ -81,7 +81,7 @@ def __post_init__(self):
self.mixed_label = self.issue.title.split(" ")[1].strip()
self.parent_prompt = self.issue.title.split("@")[-1].strip()
else:
self.mixed_label = split_prompt_message(self.challenge_prompt, lang="en")
self.mixed_label = regularize_prompt_message(self.challenge_prompt)
self.parent_prompt = "image_label_binary"

@classmethod
Expand Down Expand Up @@ -245,7 +245,7 @@ async def _collete_datasets(self, context: ASyncContext, sitelink: str):

await page.goto(sitelink)

await agent.handle_checkbox()
await agent.click_checkbox()

for pth in range(1, self.per_times + 1):
with suppress(Exception):
Expand Down
9 changes: 5 additions & 4 deletions automation/sentinel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
from playwright.async_api import BrowserContext as ASyncContext, async_playwright, Page

import hcaptcha_challenger as solver
from hcaptcha_challenger import label_cleaning, split_prompt_message
from hcaptcha_challenger.agents import AgentT, QuestionResp, Malenia
from hcaptcha_challenger import label_cleaning, regularize_prompt_message
from hcaptcha_challenger.agents import AgentT, Malenia
from hcaptcha_challenger.models import QuestionResp
from hcaptcha_challenger.onnx.yolo import is_matched_ash_of_war
from hcaptcha_challenger.utils import SiteKey

Expand Down Expand Up @@ -168,7 +169,7 @@ def _bypass_motion(self):
since=datetime.now() - timedelta(days=14),
assignee=self.assignees[0],
):
mixed_label = split_prompt_message(self.issue_prompt, lang="en")
mixed_label = regularize_prompt_message(self.issue_prompt)
if issue.created_at + timedelta(hours=24) > datetime.now():
issue.add_to_labels("🏹 ci: sentinel")
if mixed_label in issue.title.lower():
Expand Down Expand Up @@ -228,7 +229,7 @@ async def collete_datasets(self, context: ASyncContext, sitekey: str, batch: int
sitelink = SiteKey.as_sitelink(sitekey)
await page.goto(sitelink)

await agent.handle_checkbox()
await agent.click_checkbox()

for pth in range(1, batch + 1):
try:
Expand Down
15 changes: 0 additions & 15 deletions backend/main.py

This file was deleted.

Empty file added docker/Dockerfile
Empty file.
Empty file added docker/docker-compose.yaml
Empty file.
Empty file added examples/clip_datalake.json
Empty file.
25 changes: 21 additions & 4 deletions examples/demo_classifier_self_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
solver.install(upgrade=True, clip=True)

assets_dir = Path(__file__).parent.parent.joinpath("assets")
images_dir = assets_dir.joinpath("image_label_binary", "off_road_vehicle")
images_dir = assets_dir.joinpath("image_label_binary/off_road_vehicle")

prompt = "Please click each image containing a sedan car"

Expand All @@ -33,7 +33,7 @@ def prelude_self_supervised_config():
modelhub.parse_objects()
for prompt_, serialized_binary in datalake_post.items():
modelhub.datalake[prompt_] = DataLake.from_serialized(serialized_binary)
clip_model = register_pipline(modelhub)
clip_model = register_pipline(modelhub, fmt="onnx")

return modelhub, clip_model

Expand All @@ -49,13 +49,30 @@ def get_test_images() -> List[Path]:


def demo():
def output_markdown_preview():
"""# pip install pandas tabulate"""
try:
import pandas as pd
import tabulate
except ImportError:
for image_path, result in zip(image_paths, results):
print(image_path, f"{result=}")
else:
output = [
{"image": f"![]({image_path})", "result": result}
for image_path, result in zip(image_paths, results)
]
mdk = pd.DataFrame.from_records(output).to_markdown()
mdk = f"- prompt: `{prompt}`\n\n{mdk}"
Path("result.md").write_text(mdk, encoding="utf8")
print(mdk)

modelhub, clip_model = prelude_self_supervised_config()
image_paths = get_test_images()

classifier = solver.BinaryClassifier(modelhub=modelhub, clip_model=clip_model)
if results := classifier.execute(prompt, image_paths, self_supervised=True):
for image_path, result in zip(image_paths, results):
print(f"{image_path.name=} - {result=} {classifier.model_name=}")
output_markdown_preview()


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions examples/demo_find_unique_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from tqdm import tqdm

import hcaptcha_challenger as solver
from hcaptcha_challenger.components.cv_toolkit.appears_only_once import (
from hcaptcha_challenger.onnx.modelhub import ModelHub
from hcaptcha_challenger.onnx.yolo import YOLOv8Seg
from hcaptcha_challenger.tools.cv_toolkit.appears_only_once import (
limited_radius,
annotate_objects,
find_unique_object,
find_unique_color,
)
from hcaptcha_challenger.onnx.modelhub import ModelHub
from hcaptcha_challenger.onnx.yolo import YOLOv8Seg

solver.install(upgrade=True)

Expand Down
56 changes: 56 additions & 0 deletions examples/faker_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# Time : 2024/4/1 21:10
# Author : QIN2DIM
# GitHub : https://github.com/QIN2DIM
# Description:
from __future__ import annotations

import asyncio
from pathlib import Path

import dotenv
from playwright.async_api import async_playwright, BrowserContext

from hcaptcha_challenger.agents import AgentV
from hcaptcha_challenger.agents import Malenia
from hcaptcha_challenger.utils import SiteKey

dotenv.load_dotenv()

# 1. You need to deploy sub-thread tasks and actively run `install(upgrade=True)` every 20 minutes
# 2. You need to make sure to run `install(upgrade=True, clip=True)` before each instantiation
# from hcaptcha_challenger import install
# install(upgrade=True, clip=True)


async def main(headless: bool = False):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=headless)
context = await browser.new_context(locale="en-US")
await Malenia.apply_stealth(context)
await mime(context)

await context.close()


async def mime(context: BrowserContext):
page = await context.new_page()

agent = AgentV.into_solver(page=page, tmp_dir=Path("tmp_dir"))

sitekey = SiteKey.user_easy

if EXECUTION == "challenge":
sitelink = SiteKey.as_sitelink(sitekey)
await page.goto(sitelink)
await agent.ms.click_checkbox()
await agent.wait_for_challenge()
elif EXECUTION == "collect":
await agent.wait_for_collect(sitekey, batch=2)


if __name__ == "__main__":
EXECUTION = "collect"
# EXECUTION = "challenge"

encrypted_resp = asyncio.run(main(headless=False))
64 changes: 64 additions & 0 deletions examples/invoke_remote_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
# Time : 2022/9/23 17:28
# Author : QIN2DIM
# Github : https://github.com/QIN2DIM
# Description:
import base64
import random
from pathlib import Path
from typing import List

# pip install pandas tabulate
import pandas as pd
from httpx import Client

BASE_URL = "http://localhost:33777"
client = Client(base_url=BASE_URL, timeout=30)


def invoke_remove_tool(self_supervised_payload: dict):
response = client.post("/challenge/image_label_binary", json=self_supervised_payload)
response.raise_for_status()
results = response.json()["results"]

return results


def show_and_cache(image_paths: List[Path], results: List[str], prompt: str):
output = [
{"image": f"![]({image_path})", "result": result}
for image_path, result in zip(image_paths, results)
]
mdk = pd.DataFrame.from_records(output).to_markdown()
mdk = f"- prompt: `{prompt}`\n\n{mdk}"
print(mdk)

fp = Path(f"results {prompt}.md")
fp.write_text(mdk, encoding="utf8")
print(f"\nsaved ->> {fp.resolve()}")


def run():
images_dir = Path(__file__).parent.parent.joinpath("assets/image_label_binary/streetlamp")
image_paths = list(images_dir.glob("*.jpeg"))
if not image_paths:
return
random.shuffle(image_paths)
image_paths = image_paths[:5]

prompt = "streetlamp"
challenge_images = [base64.b64encode(fp.read_bytes()).decode() for fp in image_paths]
self_supervised_payload = {
"prompt": prompt,
"challenge_images": challenge_images,
"positive_labels": ["streetlamp", "light"],
"negative_labels": ["duck", "shark", "swan"],
}

results = invoke_remove_tool(self_supervised_payload)

show_and_cache(image_paths, results, prompt)


if __name__ == "__main__":
run()