Frontend (#74)

* feat(frontend): automation panel
CaptchaAgent · Nov 10, 2023 · b7ebb04 · b7ebb04
1 parent 0d045e6
commit b7ebb04
Show file tree

Hide file tree

Showing 40 changed files with 1,186 additions and 342 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/.gitignore b/.gitignore
@@ -142,8 +142,11 @@ archive/
 .run
 **/assets_cache.txt
 **/tmp_dir**
-automation/zip_dir/
-automation/yolo_mocker/
+**/zip_dir/
+**/yolo_mocker/
+**/model_zoo/
 
+# frontend
+frontend/*.db
 frontend/.web
-frontend/*.db
+**/__pycache__/
diff --git a/automation/datasets_downloader.py → automation/01_datasets_downloader.py b/automation/datasets_downloader.py → automation/01_datasets_downloader.py
@@ -14,7 +14,9 @@
 collected = []
 per_times = 60
 tmp_dir = Path(__file__).parent.joinpath("tmp_dir")
-sitekey = SiteKey.user
+
+# sitekey = "58366d97-3e8c-4b57-a679-4a41c8423be3"
+sitekey = SiteKey.epic
 
 
 async def collete_datasets(context: ASyncContext):
@@ -24,6 +26,8 @@ async def collete_datasets(context: ASyncContext):
     sitelink = SiteKey.as_sitelink(sitekey)
     await page.goto(sitelink)
 
+    logger.info("startup collector", url=sitelink)
+
     await agent.handle_checkbox()
 
     for pth in range(1, per_times + 1):

diff --git a/automation/assets_manager.py → automation/02_assets_manager.py b/automation/assets_manager.py → automation/02_assets_manager.py
@@ -137,17 +137,9 @@ def merge(self, fd: Path, td: Path):
 
 
 def run():
-    # the largest animal https://github.com/QIN2DIM/hcaptcha-challenger/issues/797
-    # red panda https://github.com/QIN2DIM/hcaptcha-challenger/issues/896
-    # sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/826"  # the smallest animal
-    # sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/896"  # red panda
-    # sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/897"  # keyboard
-    sources = "the largest animal"
-    # sources = "please click on the largest animal"
-    # sources = "the smallest animal"
-    # sources = "natural landscape"
-    # sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/857"  # flamingo
-    # sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/853"
+    sources = "electronic device"
+    sources = "https://github.com/QIN2DIM/hcaptcha-challenger/issues/696"
+
     am = AssetsManager.from_sources(sources)
     am.execute()
 

diff --git a/automation/auto_labeling.py → automation/03_auto_labeling.py b/automation/auto_labeling.py → automation/03_auto_labeling.py
@@ -15,11 +15,11 @@
 from typing import Tuple, List, Dict, NoReturn
 
 import hcaptcha_challenger as solver
-from PIL import Image
+from PIL import Image, ImageFilter
 from hcaptcha_challenger import DataLake, ModelHub, ZeroShotImageClassifier, register_pipline
 from tqdm import tqdm
 
-from flow_card import flow_card, flow_card_nested_animal
+from _flow_card import flow_card, flow_card_nested_animal
 
 solver.install(upgrade=True)
 
@@ -34,12 +34,8 @@ class SubStack:
     def from_tnf(cls, name: str, yes: List[str], bad: List[str]):
         return cls(nested_name=name, yes_seq=yes, bad_seq=bad)
 
-    @staticmethod
-    def kt(x):
-        return f"This is a photo of the {x}"
-
     def _offload(self, tag: str, dirname: str, tmp_case_dir: Path, *, to_dir: Path):
-        if self.kt(tag) == dirname:
+        if DataLake.PREMISED_YES.format(tag) == dirname:
             logging.info(f"refactor - name={self.nested_name} {tag=}")
             for image_name in os.listdir(tmp_case_dir):
                 image_path = tmp_case_dir.joinpath(image_name)
@@ -136,7 +132,13 @@ def mkdir(self, multi: bool = False) -> Tuple[Path, Path]:
 
         return yes_dir, bad_dir
 
-    def execute(self, model, substack: Dict[str, Dict[str, List[str]]] = None, **kwargs):
+    def execute(
+            self,
+            model,
+            substack: Dict[str, Dict[str, List[str]]] = None,
+            enable_gaussian: bool | None = None,
+            **kwargs,
+    ):
         if not self.pending_tasks:
             logging.info("No pending tasks")
             return
@@ -154,6 +156,11 @@ def execute(self, model, substack: Dict[str, Dict[str, List[str]]] = None, **kwa
             for image_path in self.pending_tasks[: self.limit]:
                 # The label at position 0 is the highest scoring target
                 image = Image.open(image_path)
+
+                # 可选的高斯滤波器，用于降低彩噪
+                if enable_gaussian:
+                    image = image.filter(ImageFilter.GaussianBlur(radius=1.1))
+
                 results = self.tool(model, image)
 
                 # we're dealing with multi-classification tasks here
@@ -204,7 +211,12 @@ def check_card(pending_card: list) -> NoReturn | bool:
     return True
 
 
-def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
+def run(
+        suffix_filter: str,
+        cards: list,
+        base_dirname: str = "database2309",
+        enable_gaussian: bool | None = None,
+):
     if not suffix_filter:
         return
 
@@ -233,7 +245,7 @@ def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
         )
         # Starts an automatic labeling task
         al = AutoLabeling.from_datalake(dl)
-        al.execute(model, **card)
+        al.execute(model, enable_gaussian=enable_gaussian, **card)
         # Automatically open output directory
         if "win32" in sys.platform and al.output_dir.is_dir():
             os.startfile(al.output_dir)
@@ -243,4 +255,7 @@ def run(suffix_filter: str, cards: list, base_dirname: str = "database2309"):
     logging.info(f"Loading {len(flow_card)=}")
     logging.info(f"Loading {len(flow_card_nested_animal)=}")
 
-    run("l1_penguin", cards=flow_card_nested_animal)
+    run("e1_mouse", cards=flow_card_nested_animal)
+    # run("land_vehicle", cards=flow_card)
+    # run("w1_cactus", cards=flow_card_nested_animal)
+    # run("bus", cards=flow_card_recaptcha_challenge, enable_gaussian=True)
diff --git a/automation/mini_workflow.py → automation/04_mini_workflow.py b/automation/mini_workflow.py → automation/04_mini_workflow.py
@@ -74,9 +74,9 @@ def quick_development() -> int | None:
 
 
 def upgrade_objects(aid_):
-    import annotator
+    import _annotator
 
-    annotator.rolling_upgrade(aid_)
+    _annotator.rolling_upgrade(aid_)
 
 
 if __name__ == "__main__":
@@ -88,15 +88,15 @@ def upgrade_objects(aid_):
     # fmt:off
     focus_flags = {
         # "<diagnosed_label_name>": "<model_name[flag]>"
-        # "sedan_car": "sedan_car2309",
-        # "nested_smallest_turtle": "nested_smallest_turtle2309",
-        # "nested_largest_dog": "nested_largest_dog2309",
-        # "bicycle": "bicycle2309",
-        # "nested_largest_fox": "nested_largest_fox2309",
+        # "land_vehicle": "land_vehicle2309",
+        # "server": "server2309",
+        # "movie_theater": "movie_theater2309",
+        # "business_suit": "business_suit2309",
+        "nested_electronic_device_mouse": "nested_electronic_device_mouse2309"
     }
     # fmt:on
 
     quick_train()
     aid = quick_development()
-    # upgrade_objects(aid)
+    upgrade_objects(aid)
     print(aid)
diff --git a/automation/zip_dataset.py → automation/05_zip_dataset.py b/automation/zip_dataset.py → automation/05_zip_dataset.py
@@ -148,11 +148,12 @@ def run():
     # nested_largest_ => the largest animal
     # nested_smallest_ => the smallest animal
     """
-    prompt = "nested_largest_penguin"
+    prompt = "nested_electronic_device_mouse"
 
-    # nested_prompt = ""
+    nested_prompt = "electronic device"
     # nested_prompt = "the smallest animal"
-    nested_prompt = "the largest animal"
+    # nested_prompt = "the largest animal"
+    # nested_prompt = "images that appear warmer in comparison to other"
 
     # 压缩数据集
     tn = zip_dataset(prompt=prompt)

diff --git a/automation/annotator.py → automation/_annotator.py b/automation/annotator.py → automation/_annotator.py
@@ -48,8 +48,8 @@ def from_modelhub(cls, modelhub: ModelHub):
 
     def to_yaml(self, path: Path | None = None):
         path = path or Path("objects-tmp.yaml")
-        with open(path, "w", encoding="utf8") as file:
-            yaml.safe_dump(self.__dict__, file, sort_keys=False, allow_unicode=True,)
+        data = yaml.safe_dump(self.__dict__, sort_keys=False, allow_unicode=True)
+        path.write_text(data, encoding="utf8", newline="\n")
         return path
 
     @staticmethod
@@ -222,8 +222,3 @@ def find_asset_id(name_prefix: str):
                 continue
             print(asset.name, asset.id)
             break
-
-
-if __name__ == "__main__":
-    find_asset_id("nested_smallest_bird2310")
-    rolling_upgrade()