Skip to content

Commit

Permalink
Frontend (#74)
Browse files Browse the repository at this point in the history
* feat(frontend): automation panel
  • Loading branch information
QIN2DIM committed Nov 10, 2023
1 parent 0d045e6 commit b7ebb04
Show file tree
Hide file tree
Showing 40 changed files with 1,186 additions and 342 deletions.
2 changes: 0 additions & 2 deletions .gitattributes

This file was deleted.

9 changes: 6 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,11 @@ archive/
.run
**/assets_cache.txt
**/tmp_dir**
automation/zip_dir/
automation/yolo_mocker/
**/zip_dir/
**/yolo_mocker/
**/model_zoo/

# frontend
frontend/*.db
frontend/.web
frontend/*.db
**/__pycache__/
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
collected = []
per_times = 60
tmp_dir = Path(__file__).parent.joinpath("tmp_dir")
sitekey = SiteKey.user

# sitekey = "58366d97-3e8c-4b57-a679-4a41c8423be3"
sitekey = SiteKey.epic


async def collete_datasets(context: ASyncContext):
Expand All @@ -24,6 +26,8 @@ async def collete_datasets(context: ASyncContext):
sitelink = SiteKey.as_sitelink(sitekey)
await page.goto(sitelink)

logger.info("startup collector", url=sitelink)

await agent.handle_checkbox()

for pth in range(1, per_times + 1):
Expand Down
14 changes: 3 additions & 11 deletions automation/assets_manager.py → automation/02_assets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,9 @@ def merge(self, fd: Path, td: Path):


def run():
# the largest animal https://github.com/QIN2DIM/hcaptcha-challenger/issues/797
# red panda https://github.com/QIN2DIM/hcaptcha-challenger/issues/896
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/826" # the smallest animal
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/896" # red panda
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/897" # keyboard
sources = "the largest animal"
# sources = "please click on the largest animal"
# sources = "the smallest animal"
# sources = "natural landscape"
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/857" # flamingo
# sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/853"
sources = "electronic device"
sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/696"

am = AssetsManager.from_sources(sources)
am.execute()

Expand Down
37 changes: 26 additions & 11 deletions automation/auto_labeling.py → automation/03_auto_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from typing import Tuple, List, Dict, NoReturn

import hcaptcha_challenger as solver
from PIL import Image
from PIL import Image, ImageFilter
from hcaptcha_challenger import DataLake, ModelHub, ZeroShotImageClassifier, register_pipline
from tqdm import tqdm

from flow_card import flow_card, flow_card_nested_animal
from _flow_card import flow_card, flow_card_nested_animal

solver.install(upgrade=True)

Expand All @@ -34,12 +34,8 @@ class SubStack:
def from_tnf(cls, name: str, yes: List[str], bad: List[str]):
return cls(nested_name=name, yes_seq=yes, bad_seq=bad)

@staticmethod
def kt(x):
return f"This is a photo of the {x}"

def _offload(self, tag: str, dirname: str, tmp_case_dir: Path, *, to_dir: Path):
if self.kt(tag) == dirname:
if DataLake.PREMISED_YES.format(tag) == dirname:
logging.info(f"refactor - name={self.nested_name} {tag=}")
for image_name in os.listdir(tmp_case_dir):
image_path = tmp_case_dir.joinpath(image_name)
Expand Down Expand Up @@ -136,7 +132,13 @@ def mkdir(self, multi: bool = False) -> Tuple[Path, Path]:

return yes_dir, bad_dir

def execute(self, model, substack: Dict[str, Dict[str, List[str]]] = None, **kwargs):
def execute(
self,
model,
substack: Dict[str, Dict[str, List[str]]] = None,
enable_gaussian: bool | None = None,
**kwargs,
):
if not self.pending_tasks:
logging.info("No pending tasks")
return
Expand All @@ -154,6 +156,11 @@ def execute(self, model, substack: Dict[str, Dict[str, List[str]]] = None, **kwa
for image_path in self.pending_tasks[: self.limit]:
# The label at position 0 is the highest scoring target
image = Image.open(image_path)

# 可选的高斯滤波器,用于降低彩噪
if enable_gaussian:
image = image.filter(ImageFilter.GaussianBlur(radius=1.1))

results = self.tool(model, image)

# we're dealing with multi-classification tasks here
Expand Down Expand Up @@ -204,7 +211,12 @@ def check_card(pending_card: list) -> NoReturn | bool:
return True


def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
def run(
suffix_filter: str,
cards: list,
base_dirname: str = "database2309",
enable_gaussian: bool | None = None,
):
if not suffix_filter:
return

Expand Down Expand Up @@ -233,7 +245,7 @@ def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
)
# Starts an automatic labeling task
al = AutoLabeling.from_datalake(dl)
al.execute(model, **card)
al.execute(model, enable_gaussian=enable_gaussian, **card)
# Automatically open output directory
if "win32" in sys.platform and al.output_dir.is_dir():
os.startfile(al.output_dir)
Expand All @@ -243,4 +255,7 @@ def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
logging.info(f"Loading {len(flow_card)=}")
logging.info(f"Loading {len(flow_card_nested_animal)=}")

run("l1_penguin", cards=flow_card_nested_animal)
run("e1_mouse", cards=flow_card_nested_animal)
# run("land_vehicle", cards=flow_card)
# run("w1_cactus", cards=flow_card_nested_animal)
# run("bus", cards=flow_card_recaptcha_challenge, enable_gaussian=True)
16 changes: 8 additions & 8 deletions automation/mini_workflow.py → automation/04_mini_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def quick_development() -> int | None:


def upgrade_objects(aid_):
import annotator
import _annotator

annotator.rolling_upgrade(aid_)
_annotator.rolling_upgrade(aid_)


if __name__ == "__main__":
Expand All @@ -88,15 +88,15 @@ def upgrade_objects(aid_):
# fmt:off
focus_flags = {
# "<diagnosed_label_name>": "<model_name[flag]>"
# "sedan_car": "sedan_car2309",
# "nested_smallest_turtle": "nested_smallest_turtle2309",
# "nested_largest_dog": "nested_largest_dog2309",
# "bicycle": "bicycle2309",
# "nested_largest_fox": "nested_largest_fox2309",
# "land_vehicle": "land_vehicle2309",
# "server": "server2309",
# "movie_theater": "movie_theater2309",
# "business_suit": "business_suit2309",
"nested_electronic_device_mouse": "nested_electronic_device_mouse2309"
}
# fmt:on

quick_train()
aid = quick_development()
# upgrade_objects(aid)
upgrade_objects(aid)
print(aid)
7 changes: 4 additions & 3 deletions automation/zip_dataset.py → automation/05_zip_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,12 @@ def run():
# nested_largest_ => the largest animal
# nested_smallest_ => the smallest animal
"""
prompt = "nested_largest_penguin"
prompt = "nested_electronic_device_mouse"

# nested_prompt = ""
nested_prompt = "electronic device"
# nested_prompt = "the smallest animal"
nested_prompt = "the largest animal"
# nested_prompt = "the largest animal"
# nested_prompt = "images that appear warmer in comparison to other"

# 压缩数据集
tn = zip_dataset(prompt=prompt)
Expand Down
9 changes: 2 additions & 7 deletions automation/annotator.py → automation/_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def from_modelhub(cls, modelhub: ModelHub):

def to_yaml(self, path: Path | None = None):
path = path or Path("objects-tmp.yaml")
with open(path, "w", encoding="utf8") as file:
yaml.safe_dump(self.__dict__, file, sort_keys=False, allow_unicode=True,)
data = yaml.safe_dump(self.__dict__, sort_keys=False, allow_unicode=True)
path.write_text(data, encoding="utf8", newline="\n")
return path

@staticmethod
Expand Down Expand Up @@ -222,8 +222,3 @@ def find_asset_id(name_prefix: str):
continue
print(asset.name, asset.id)
break


if __name__ == "__main__":
find_asset_id("nested_smallest_bird2310")
rolling_upgrade()
Loading

0 comments on commit b7ebb04

Please sign in to comment.