v0.3.0 #154 #144 #143 #141 #128 #127 #113

Co-Authored-By: Bingjie YAN <bj.yan.pa@qq.com>
QIN2DIM · Aug 28, 2022 · b8bbf09 · b8bbf09
1 parent 5f9187d
commit b8bbf09
Show file tree

Hide file tree

Showing 12 changed files with 429 additions and 351 deletions.
diff --git a/src/apis/scaffold/challenge.py b/src/apis/scaffold/challenge.py
@@ -17,7 +17,6 @@
     DIR_MODEL,
     DIR_CHALLENGE,
     PATH_OBJECTS_YAML,
-    PATH_RAINBOW_YAML,
 )
 from services.utils import get_challenge_ctx
 
@@ -43,7 +42,7 @@ def runner(
         onnx_prefix=onnx_prefix,
         screenshot=screenshot,
         path_objects_yaml=PATH_OBJECTS_YAML,
-        path_rainbow_yaml=PATH_RAINBOW_YAML,
+        on_rainbow=True,
     )
     challenger_utils = ArmorUtils()
 

diff --git a/src/apis/scaffold/install.py b/src/apis/scaffold/install.py
@@ -3,17 +3,15 @@
 # Author     : QIN2DIM
 # Github     : https://github.com/QIN2DIM
 # Description:
-import hashlib
-import os
 import sys
 import webbrowser
 from typing import Optional
 
 from webdriver_manager.chrome import ChromeType
 from webdriver_manager.core.utils import get_browser_version_from_os
 
-from services.hcaptcha_challenger import YOLO, SKRecognition, PluggableONNXModels
-from services.settings import DIR_MODEL, logger, PATH_RAINBOW_YAML, PATH_OBJECTS_YAML
+from services.hcaptcha_challenger import PluggableONNXModels, Rainbow, YOLO
+from services.settings import DIR_MODEL, logger, PATH_OBJECTS_YAML, DIR_ASSETS
 
 
 def download_driver():
@@ -39,25 +37,16 @@ def download_driver():
     logger.info("Re-execute the `install` scaffolding command after the installation is complete.")
 
 
-def download_yolo_model(onnx_prefix):
-    YOLO(dir_model=DIR_MODEL, onnx_prefix=onnx_prefix).download_model()
-
-
-def refresh_pluggable_onnx_model(upgrade: Optional[bool] = None):
-    def need_to_refresh():
-        _flag = "5ba2edb8fdd1350ff3a8731bc71c313998ba70a32d58178a218545e54d2701cf"
-        if not os.path.exists(PATH_RAINBOW_YAML):
-            return True
-        with open(PATH_RAINBOW_YAML, "rb") as file:
-            return hashlib.sha256(file.read()).hexdigest() != _flag
+def do(yolo_onnx_prefix: Optional[str] = None, upgrade: Optional[bool] = False):
+    """下载项目运行所需的各项依赖"""
+    download_driver()
 
-    if need_to_refresh():
-        SKRecognition().sync_rainbow(path_rainbow=PATH_RAINBOW_YAML, convert=True)
-        PluggableONNXModels(PATH_OBJECTS_YAML).summon(dir_model=DIR_MODEL, upgrade=upgrade)
+    # PULL rainbow table
+    Rainbow(DIR_ASSETS).sync()
 
+    # PULL YOLO ONNX Model by the prefix flag
+    YOLO(DIR_MODEL, yolo_onnx_prefix).pull_model()
 
-def run(model: Optional[str] = None, upgrade: Optional[bool] = None):
-    """下载项目运行所需的各项依赖"""
-    download_driver()
-    download_yolo_model(onnx_prefix=model)
-    refresh_pluggable_onnx_model(upgrade=upgrade)
+    # PULL ResNet ONNX Model(s) by objects.yaml
+    if upgrade is True:
+        PluggableONNXModels(PATH_OBJECTS_YAML).summon(DIR_MODEL)
diff --git a/src/objects.yaml b/src/objects.yaml
@@ -4,59 +4,65 @@
 #   en: ["en_prompt_label"]
 label_alias:
   seaplane:
-    zh: ["水上飞机"]
-    en: ["seaplane"]
-  domestic_cat:
-    zh: ["家猫"]
-    en: ["domestic cat"]
+    zh: [ "水上飞机" ]
+    en: [ "seaplane" ]
   bedroom:
-    zh: ["卧室"]
-    en: ["bedroom"]
+    zh: [ "卧室" ]
+    en: [ "bedroom" ]
   bridge:
-    zh: ["桥梁"]
-    en: ["bridge"]
-  lion:
-    zh: ["狮子"]
-    en: ["lion"]
+    zh: [ "桥梁" ]
+    en: [ "bridge" ]
+  domestic_cat:
+    zh: [ "家猫", "猫" ]
+    en: [ "domestic cat", "cat" ]
   living_room:
-    zh: ["客厅"]
-    en: ["living room"]
-  horse:
-    zh: ["一匹马"]
-    en: ["horse"]
+    zh: [ "客厅" ]
+    en: [ "living room" ]
   conference_room:
-    zh: ["会议室"]
-    en: ["conference room"]
-  smiling_dog:
-    zh: ["微笑狗"]
-    en: ["smiling dog"]
-  horse_made_of_clouds:
-    zh: ["一匹由云制成的马"]
-    en: ["horse made of clouds"]
+    zh: [ "会议室" ]
+    en: [ "conference room" ]
   elephant_made_of_clouds:
-    zh: ["由云制成的大象"]
-    en: ["elephant made of clouds"]
-  giraffe:
-    zh: ["长颈鹿"]
-    en: ["giraffe"]
+    zh: [ "由云制成的大象" ]
+    en: [ "elephant made of clouds" ]
   parrot:
-    zh: ["鹦鹉"]
-    en: ["parrot"]
+    zh: [ "鹦鹉" ]
+    en: [ "parrot" ]
+
+  lion:
+    zh: [ "狮子" ]
+    en: [ "lion" ]
   lion_with_mane_on_its_neck:
-    zh: ["一只脖子上有鬃毛的狮子", "雄狮"]
-    en: ["lion with mane on its neck", "male lion"]
+    zh: [ "一只脖子上有鬃毛的狮子", "雄狮" ]
+    en: [ "lion with mane on its neck", "male lion" ]
   lion_with_open_eyes:
-    zh: ["睁开眼睛的狮子"]
-    en: ["lion with open eyes"]
+    zh: [ "睁开眼睛的狮子" ]
+    en: [ "lion with open eyes" ]
   lion_with_closed_eyes:
-    zh: ["一只闭着眼睛的狮子"]
-    en: ["lion with closed eyes"]
-#  dog_with_a_collar_on_its_neck:
-#   zh: ["一条脖子上有项圈的狗"]
-#   en: ["dog with a collar on its neck"]
-#  dog_without_a_collar:
-#    zh: ["一条没有项圈的狗"]
-#    en: ["dog without a collar"]
-#  elephants_drawn_with_leaves:
-#    zh: ["请选择所有用树叶画的大象"]
-#    en: ["elephants drawn with leaves"]
+    zh: [ "一只闭着眼睛的狮子" ]
+    en: [ "lion with closed eyes" ]
+  lion_with_an_open_mouth:
+    zh: [ "张开嘴的狮子" ]
+    en: [ "lion with an open mouth" ]
+  lion_with_a_closed_mouth:
+    zh: [ "一只闭着嘴的狮子" ]
+    en: [ "lion with a closed mouth" ]
+  female_lion:
+    zh: [ "雌狮" ]
+    en: [ "female lion" ]
+
+  horse_made_of_clouds:
+    zh: [ "一匹由云制成的马" ]
+    en: [ "horse made of clouds" ]
+  horse_facing_to_the_left:
+    zh: [ "朝左马" ]
+    en: [ "horse facing to the left" ]
+  horse_facing_to_the_right:
+    zh: [ "面向右侧的马" ]
+    en: [ "horse facing to the right" ]
+
+  smiling_dog:
+    zh: [ "微笑狗" ]
+    en: [ "smiling dog" ]
+  dog_with_a_collar_on_its_neck:
+    zh: [ "一条脖子上有项圈的狗" ]
+    en: [ "dog with a collar on its neck" ]
diff --git a/src/services/hcaptcha_challenger/__init__.py b/src/services/hcaptcha_challenger/__init__.py
@@ -5,7 +5,7 @@
 # Description:
 from .core import ArmorCaptcha, ArmorUtils
 from .solutions.resnet import PluggableONNXModels
-from .solutions.sk_recognition import SKRecognition
 from .solutions.yolo import YOLO
+from .solutions.kernel import Rainbow
 
-__all__ = ["SKRecognition", "YOLO", "ArmorCaptcha", "ArmorUtils", "PluggableONNXModels"]
+__all__ = ["YOLO", "ArmorCaptcha", "ArmorUtils", "PluggableONNXModels", "Rainbow"]
diff --git a/src/services/hcaptcha_challenger/core.py b/src/services/hcaptcha_challenger/core.py
@@ -1,11 +1,12 @@
 import asyncio
 import os
+import random
 import re
 import sys
 import time
-import urllib.request
 from typing import Optional, Union, Tuple
-
+from urllib.request import getproxies
+from urllib.parse import quote
 from selenium.common.exceptions import (
     ElementNotVisibleException,
     ElementClickInterceptedException,
@@ -27,7 +28,7 @@
     AssertTimeout,
     ChallengeLangException,
 )
-from .solutions import sk_recognition, resnet, yolo
+from .solutions import resnet, yolo
 
 
 class ArmorCaptcha:
@@ -51,7 +52,9 @@ class ArmorCaptcha:
             "大象": "elephant",
             "鸟": "bird",
             "狗": "dog",
-            "犬科动物": "dog"
+            "犬科动物": "dog",
+            "一匹马": "horse",
+            "长颈鹿": "giraffe",
         },
         "en": {
             "airplane": "airplane",
@@ -69,7 +72,9 @@ class ArmorCaptcha:
             "elephant": "elephant",
             "bird": "bird",
             "dog": "dog",
-            "canine": "dog"
+            "canine": "dog",
+            "horse": "horse",
+            "giraffe": "giraffe",
         },
     }
 
@@ -104,15 +109,15 @@ class ArmorCaptcha:
     CHALLENGE_BACKCALL = "backcall"
 
     def __init__(
-            self,
-            dir_workspace: str = None,
-            lang: Optional[str] = "zh",
-            dir_model: str = None,
-            onnx_prefix: str = None,
-            screenshot: Optional[bool] = False,
-            debug=False,
-            path_objects_yaml: Optional[str] = None,
-            path_rainbow_yaml: Optional[str] = None,
+        self,
+        dir_workspace: str = None,
+        lang: Optional[str] = "zh",
+        dir_model: str = None,
+        onnx_prefix: str = None,
+        screenshot: Optional[bool] = False,
+        debug=False,
+        path_objects_yaml: Optional[str] = None,
+        on_rainbow: Optional[bool] = None,
     ):
         if not isinstance(lang, str) or not self.label_alias.get(lang):
             raise ChallengeLangException(
@@ -126,7 +131,7 @@ def __init__(
         self.onnx_prefix = onnx_prefix
         self.screenshot = screenshot
         self.path_objects_yaml = path_objects_yaml
-        self.path_rainbow_yaml = path_rainbow_yaml
+        self.on_rainbow = on_rainbow
 
         # 存储挑战图片的目录
         self.runtime_workspace = ""
@@ -153,9 +158,7 @@ def __init__(
         # Automatic registration
         self.pom_handler = resnet.PluggableONNXModels(self.path_objects_yaml)
         self.label_alias.update(self.pom_handler.label_alias[lang])
-        self.pluggable_onnx_models = self.pom_handler.overload(
-            self.dir_model, path_rainbow=self.path_rainbow_yaml
-        )
+        self.pluggable_onnx_models = self.pom_handler.overload(self.dir_model, self.on_rainbow)
         self.yolo_model = yolo.YOLO(self.dir_model, self.onnx_prefix)
 
     def _init_workspace(self):
@@ -230,7 +233,7 @@ def get_label(self, ctx: Chrome):
         def split_prompt_message(prompt_message: str) -> str:
             """根据指定的语种在提示信息中分离挑战标签"""
             labels_mirror = {
-                "zh": re.split(r"[包含 图片]", prompt_message)[2][:-1]
+                "zh": re.split(r"[包含 图片]", prompt_message)[2][:-1].replace("的每", "")
                 if "包含" in prompt_message
                 else prompt_message,
                 "en": re.split(r"containing a", prompt_message)[-1][1:].strip().replace(".", "")
@@ -294,7 +297,7 @@ def tactical_retreat(self, ctx) -> Optional[str]:
         except WebDriverException as err:
             logger.exception(err)
         finally:
-            q = self.label.replace(" ", "+")
+            q = quote(self.label, "utf8")
             logger.warning(
                 ToolBox.runtime_report(
                     motive="ALERT",
@@ -311,21 +314,11 @@ def tactical_retreat(self, ctx) -> Optional[str]:
 
     def switch_solution(self):
         """Optimizing solutions based on different challenge labels"""
-        sk_solution = {
-            "vertical river": sk_recognition.VerticalRiverRecognition,
-            "airplane in the sky flying left": sk_recognition.LeftPlaneRecognition,
-            "airplanes in the sky that are flying to the right": sk_recognition.RightPlaneRecognition,
-        }
-
         label_alias = self.label_alias.get(self.label)
 
-        # Select ResNet ONNX model
+        # Select ONNX model - ResNet | YOLO
         if self.pluggable_onnx_models.get(label_alias):
             return self.pluggable_onnx_models[label_alias]
-        # Select SK-Image method
-        if sk_solution.get(label_alias):
-            return sk_solution[label_alias](self.path_rainbow_yaml)
-        # Select YOLO ONNX model
         return self.yolo_model
 
     def mark_samples(self, ctx: Chrome):
@@ -381,8 +374,8 @@ def download_images(self):
         ### Solution
 
         1. Coroutine Downloader
-          Use the coroutine-based method to pull the image to the local, the best practice (this method).
-          In the case of poor network, pull efficiency is at least 10 times faster than traversal download.
+          Use the coroutine-based method to _pull the image to the local, the best practice (this method).
+          In the case of poor network, _pull efficiency is at least 10 times faster than traversal download.
 
         2. Screen cut
           There is some difficulty in coding.
@@ -445,7 +438,7 @@ def challenge(self, ctx: Chrome, model):
 
         ta = []
         # {{< IMAGE CLASSIFICATION >}}
-        for alias in self.alias2path.keys():
+        for alias in self.alias2path:
             # Read binary data weave into types acceptable to the model
             with open(self.alias2path[alias], "rb") as file:
                 data = file.read()
@@ -456,6 +449,8 @@ def challenge(self, ctx: Chrome, model):
             # Pass: Hit at least one object
             if result:
                 try:
+                    # Doubtful operation
+                    time.sleep(random.uniform(0.2, 0.3))
                     self.alias2locator[alias].click()
                 except StaleElementReferenceException:
                     pass
@@ -469,9 +464,13 @@ def challenge(self, ctx: Chrome, model):
 
         # {{< SUBMIT ANSWER >}}
         try:
-            WebDriverWait(ctx, 35, ignored_exceptions=ElementClickInterceptedException).until(
+            WebDriverWait(ctx, 15, ignored_exceptions=ElementClickInterceptedException).until(
                 EC.element_to_be_clickable((By.XPATH, "//div[@class='button-submit button']"))
             ).click()
+            WebDriverWait(ctx, 15).until(
+                EC.element_to_be_clickable((By.XPATH, "//div[@class='button-submit button']"))
+            )
+            print("下一個")
         except ElementClickInterceptedException:
             pass
         except WebDriverException as err:
@@ -515,8 +514,8 @@ def is_flagged_flow():
                     EC.visibility_of_element_located((By.XPATH, "//div[@class='error-text']"))
                 )
                 self.threat += 1
-                if urllib.request.getproxies() and self.threat > 1:
-                    logger.warning("Your proxy IP may have been flagged.")
+                if getproxies() and self.threat > 4:
+                    logger.warning(f"Your proxy IP may have been flagged - proxies={getproxies()}")
                 return True
             except TimeoutException:
                 return False
@@ -534,7 +533,7 @@ def is_successful_at_the_demo_site():
             except TimeoutException:
                 pass
 
-        time.sleep(0.2)
+        time.sleep(1)
 
         for _ in range(3):
             # Pop prompt "Please try again".