Skip to content

Commit

Permalink
ffe
Browse files Browse the repository at this point in the history
  • Loading branch information
QIN2DIM committed Nov 2, 2023
1 parent 92e22d4 commit fa3358a
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 97 deletions.
18 changes: 17 additions & 1 deletion automation/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def from_modelhub(cls, modelhub: ModelHub):
def to_yaml(self, path: Path | None = None):
path = path or Path("objects-tmp.yaml")
with open(path, "w", encoding="utf8") as file:
yaml.safe_dump(self.__dict__, file, sort_keys=False, allow_unicode=True)
yaml.safe_dump(self.__dict__, file, sort_keys=False, allow_unicode=True,)
return path

@staticmethod
Expand Down Expand Up @@ -209,5 +209,21 @@ def rolling_upgrade(asset_id=None, matched_label: str = ""):
logger.warning(err)


def find_asset_id(name_prefix: str):
"""如果工作流在滚动更新前中断,可以通过此函数根据模型名前缀匹配到资源的 asset_id"""
repo = Annotator.repo
modelhub_title = "ONNX ModelHub"

for release in repo.get_releases():
if release.title != modelhub_title:
continue
for asset in release.get_assets():
if not asset.name.startswith(name_prefix):
continue
print(asset.name, asset.id)
break


if __name__ == "__main__":
find_asset_id("nested_smallest_bird2310")
rolling_upgrade()
6 changes: 5 additions & 1 deletion automation/assets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,15 @@ def merge(self, fd: Path, td: Path):
def run():
# the largest animal https://github.com/QIN2DIM/hcaptcha-challenger/issues/797
# red panda https://github.com/QIN2DIM/hcaptcha-challenger/issues/896
sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/826" # the smallest animal
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/826" # the smallest animal
sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/896" # red panda
sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/897" # keyboard
# sources = "the largest animal"
# sources = "please click on the largest animal"
sources = "the smallest animal"
# sources = "natural landscape"
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/857" # flamingo
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/853"
am = AssetsManager.from_sources(sources)
am.execute()

Expand Down
58 changes: 47 additions & 11 deletions automation/auto_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,14 @@
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Tuple, List, Dict
from typing import Tuple, List, Dict, NoReturn

import hcaptcha_challenger as solver
from PIL import Image
from hcaptcha_challenger import DataLake, ModelHub, ZeroShotImageClassifier, register_pipline
from tqdm import tqdm

from flow_card import flow_card

logging.basicConfig(
level=logging.INFO, stream=sys.stdout, format="%(asctime)s - %(levelname)s - %(message)s"
)
from flow_card import flow_card, flow_card_nested_animal

solver.install(upgrade=True)

Expand Down Expand Up @@ -181,8 +177,45 @@ def execute(self, model, substack: Dict[str, Dict[str, List[str]]] = None, **kwa
stk.transform(base_dir=self.output_dir)


def run():
images_dir = Path(__file__).parent.parent.joinpath("database2309")
def check_card(pending_card: list) -> NoReturn | bool:
require_keys = ["positive_labels", "negative_labels", "joined_dirs"]
ok = True
for i, card in enumerate(pending_card):
for rk in require_keys:
if rk not in card or not isinstance(card[rk], list):
logging.error(f"card 缺少必要的键值对 - {i=} key={rk} {card=}")
ok = False
elif len(card[rk]) == 0:
logging.error(f"card 的必要信息不能为空 - {i=} key={rk} {require_keys=}")
ok = False
elif len(card[rk]) < 2:
logging.error(f"CLIP 模型在二分类任务上的准确率可能会比较糟糕 - {i=} ")
ok = False

for label in card[rk]:
if not isinstance(label, str):
logging.error(
f"card 的 require_keys 的值必须是 List[str] 类型 - {i=} {label=} {type(label)=}"
)
ok = False
if len(label) <= 1:
logging.error(f"这可能是一个异常输入 - {i=} {label=}")
ok = False

if not ok:
raise ValueError("card 存在致命的写法问题,请逐一排查后再启动程序!")
return True


def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
if not suffix_filter:
return

logging.info("正在检查运行配置")
check_card(cards)
logging.info("运行配置准确无误!")

images_dir = Path(__file__).parent.parent.joinpath(base_dirname)

modelhub = ModelHub.from_github_repo()
modelhub.parse_objects()
Expand All @@ -191,9 +224,9 @@ def run():
# the NVIDIA graphics card is available
model = register_pipline(modelhub, fmt="transformers")

for card in flow_card:
for card in cards:
# Filter out the task cards we care about
if "f1-bird" not in card["joined_dirs"]:
if suffix_filter not in card["joined_dirs"]:
continue
# Generating a dataclass from serialized data
dl = DataLake(
Expand All @@ -210,4 +243,7 @@ def run():


if __name__ == "__main__":
run()
logging.info(f"Loading {len(flow_card)=}")
logging.info(f"Loading {len(flow_card_nested_animal)=}")

run("s1_bird", cards=flow_card_nested_animal)
109 changes: 73 additions & 36 deletions automation/check_yolo_det_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,95 @@
import sys
from pathlib import Path

import cv2
import onnxruntime
from hcaptcha_challenger import install, YOLOv8
from hcaptcha_challenger.components.yolo_mocker import CcYOLO
from hcaptcha_challenger.onnx.modelhub import request_resource
from tqdm import tqdm

install(upgrade=True)
model_url = "https://github.com/QIN2DIM/hcaptcha-challenger/releases/download/model/"

this_dir = Path(__file__).parent
models_dir = this_dir.joinpath("tmp_models")
models_dir.mkdir(exist_ok=True)

class CbYOLO(CcYOLO):
def __init__(self, model_name: str, images_absolute_dir: Path, this_dir: Path, classes=None):
super().__init__(model_name, images_absolute_dir, this_dir)
self.classes = classes

def get_model(self) -> YOLOv8 | None:
classes = self.modelhub.ashes_of_war.get(self.model_name)
if not classes:
if not self.classes:
raise AttributeError(f"Model name not found - {self.model_name=}")
print(f">> Match model - {self.model_name=}")
model_path = Path(self.model_name)
if not model_path.exists():
request_resource(self.model_url + self.model_name, model_path)
try:
session = onnxruntime.InferenceSession(
model_path, providers=onnxruntime.get_available_providers()
)
detector = YOLOv8.from_pluggable_model(session, self.classes)
except Exception as err:
print(err)
shutil.rmtree(model_path, ignore_errors=True)
else:
return detector

def load_model(model_path, classes):
session = onnxruntime.InferenceSession(
model_path, providers=onnxruntime.get_available_providers()
)
detector = YOLOv8.from_pluggable_model(session, classes)
return detector


def execute(input_dir: Path | str, model_name: str, model_path: Path, classes: list):
def draw():
alts = sorted(results, key=lambda x: x[-1])
text, ps, pe, _ = alts[-1]
image = cv2.imread(str(image_path))
pt1 = int(ps[0]), int(ps[1])
pt2 = int(pe[0]), int(pe[1])
cv2.rectangle(image, pt1, pt2, (87, 241, 126), 2)
cv2.imwrite(str(output_path), image)

if isinstance(input_dir, str):
input_dir = Path(input_dir)
input_dir = input_dir.absolute()

if not model_path.exists():
request_resource(model_url + model_name, model_path)

detector = load_model(model_path, classes)

output_dir = this_dir.joinpath("yolo_mocker", input_dir.name)
output_miss_dir = output_dir.joinpath("miss")
output_dir.mkdir(parents=True, exist_ok=True)
output_miss_dir.mkdir(parents=True, exist_ok=True)

pending_image_paths = []
for image_name in os.listdir(input_dir):
image_path = input_dir.joinpath(image_name)
if image_path.is_file() and not output_dir.joinpath(image_name).exists():
pending_image_paths.append(image_path)

total = len(pending_image_paths)
handle, miss = 0, 0

with tqdm(total=total, desc=f"Labeling | ") as progress:
for image_path in pending_image_paths:
results = detector(image_path, shape_type="bounding_box")
progress.update(1)
if not results:
output_miss_path = output_miss_dir.joinpath(image_path.name)
shutil.copyfile(image_path, output_miss_path)
miss += 1
continue
output_path = output_dir.joinpath(image_path.name)
draw()
handle += 1
print(f">> Statistic - {total=} {handle=} {miss=}")

return output_dir


def run():
images_dir = r"zip_dir/click_on_the_turtle_s_head_default"

# model_name = "burl_head_of_the_lion_2309_yolov8s.onnx"
model_name = "head_of_the_animal_turtle_2309_yolov8s.onnx"
# model_name = "head_of_the_meerkat_2311_yolov8n.onnx"
classes = ["animal-head"]
images_dir = r"zip_dir/click_on_the_turtle_s_head_default"

this_dir = Path(__file__).parent
output_dir = this_dir.joinpath("yolo_mocker")

if isinstance(images_dir, str):
images_dir = Path(images_dir)
images_dir = images_dir.absolute()

ccy = CbYOLO(model_name, images_dir, output_dir, classes)
ccy.spawn()
output_dir = execute(
input_dir=images_dir,
model_name=model_name,
model_path=models_dir.joinpath(model_name),
classes=classes,
)

if "win32" in sys.platform:
os.startfile(ccy.output_dir)
os.startfile(output_dir)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion automation/datasets_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
collected = []
per_times = 60
tmp_dir = Path(__file__).parent.joinpath("tmp_dir")
sitekey = SiteKey.discord
sitekey = SiteKey.epic


async def collete_datasets(context: ASyncContext):
Expand Down
89 changes: 47 additions & 42 deletions automation/flow_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
# GitHub : https://github.com/QIN2DIM
# Description:
# Run `assets_manager.py` to get test data from GitHub issues
import logging
import sys

logging.basicConfig(
level=logging.INFO, stream=sys.stdout, format="%(asctime)s - %(levelname)s - %(message)s"
)

flow_card = [
{
Expand All @@ -26,44 +32,11 @@
"negative_labels": ["bicycle", "off-road vehicle"],
"joined_dirs": ["sedan_car"],
},
{
"positive_labels": ["turtle"],
"negative_labels": ["horse", "bear", "giraffe", "dolphins"],
"joined_dirs": ["please_click_on_the_smallest_animal", "nested_smallest_turtle"],
},
{
"positive_labels": ["dog"],
"negative_labels": ["frog", "hedgehog", "squirrel", "hummingbird"],
"joined_dirs": ["please_click_on_the_largest_animal", "nested_largest_dog"],
},
# multi classification for nested prompt
{
"positive_labels": ["dog", "fox"],
"negative_labels": ["crab", "bird", "dragonfly", "ant"],
"joined_dirs": ["the_largest_animal"],
# = ↑↑ = 和常规情况一样,先对整体数据集进行多目标分类
# = ↓↓ = 再根据具体的 yes/bad 映射关系进行数据集二次移动
"substack": {
"nested_largest_dog": {"yes": ["dog"], "bad": ["crab", "bird", "dragonfly", "ant"]},
"nested_largest_fox": {"yes": ["fox"], "bad": ["crab", "bird", "dragonfly", "ant"]},
},
},
{
"positive_labels": ["red panda"],
"negative_labels": ["cactus", "door", "guinea pig", "meerkat"],
"negative_labels": ["cactus", "door", "guinea pig", "meerkat", "bird"],
"joined_dirs": ["red_panda"],
},
{
"positive_labels": ["tiger", "squirrel"],
"negative_labels": ["dog", "bat", "raccoon", "ant", "ladybug"],
"joined_dirs": ["please_click_on_the_largest_animal", "fff"],
# = ↑↑ = 和常规情况一样,先对整体数据集进行多目标分类
# = ↓↓ = 再根据具体的 yes/bad 映射关系进行数据集二次移动
"substack": {
"nested_largest_tiger": {"yes": ["tiger"], "bad": ["dog", "bat", "raccoon"]},
"nested_largest_squirrel": {"yes": ["squirrel"], "bad": ["ant", "ladybug"]},
},
},
{
"positive_labels": ["natural landscape", "Mountain", "forest"],
"negative_labels": [
Expand All @@ -77,18 +50,50 @@
"joined_dirs": ["natural_landscape"],
},
{
"positive_labels": ["starfish"],
"negative_labels": ["panda", "dog", "cow", "elephant", "guinea pig", "dolphins",
"bird", "goat", "lion", "bear", ""],
"joined_dirs": ["the_smallest_animal", "f1-star"],
"positive_labels": ["keyboard"],
"negative_labels": ["panda", "goat", "headphones", "bird", "trunk"],
"joined_dirs": ["keyboard"],
},
]

flow_card_nested_animal = [
{
"positive_labels": ["panda"],
"negative_labels": ["raccoon", "dog", "meerkat", "koala"],
"joined_dirs": ["the_largest_animal", "l1_panda"],
"substack": {
"nested_largest_panda": {
"yes": ["panda"],
"bad": ["raccoon", "dog", "meerkat", "koala"],
}
},
},
{
"positive_labels": ["horse"],
"negative_labels": ["elephant", "whale"],
"joined_dirs": ["the_smallest_animal", "s1_horse"],
"substack": {"nested_smallest_horse": {"yes": ["horse"], "bad": ["elephant", "whale"]}},
},
{
"positive_labels": ["bird"],
"negative_labels": ["panda", "dog", "cow", "dolphins", "goat", "lion", "bear", "giraffe"],
"joined_dirs": ["the_smallest_animal", "f1-bird"],
"negative_labels": ["ladybug", "butterfly", "dragonfly", "bees", "crab", "frog", "ant"],
"joined_dirs": ["the_largest_animal", "l1_bird"],
"substack": {
"nested_smallest_bird": {"yes": ["bird"], "bad": ["panda", "dog", "cow", "dolphins", "goat", "lion", "bear", "giraffe"]},
"nested_largest_bird": {
"yes": ["bird"],
"bad": ["ladybug", "butterfly", "dragonfly", "bees", "crab", "frog", "ant"],
}
},
},
{
"positive_labels": ["bird"],
"negative_labels": ["panda", "giraffe", "dolphins", "lion"],
"joined_dirs": ["the_smallest_animal", "s1_bird"],
"substack": {
"nested_smallest_bird": {
"yes": ["bird"],
"bad": ["panda", "giraffe", "dolphins", "lion"],
}
},
},

]
Loading

0 comments on commit fa3358a

Please sign in to comment.