Merge pull request #6 from beiyuouo/main

build 🚀: RAG
QIN2DIM · Mar 1, 2022 · fe8bafe · fe8bafe
2 parents 79a34fe + 4f7d008
commit fe8bafe
Show file tree

Hide file tree

Showing 9 changed files with 324 additions and 188 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,7 @@ loguru~=0.6.0
 selenium~=4.1.0
 aiohttp~=3.8.1
 opencv-python~=4.5.5.62
-numpy~=1.22.2
 undetected_chromedriver==3.1.3
 webdriver-manager>=3.5.2
+scikit-image~=0.19.2
+numpy>=1.21.5
diff --git a/src/apis/scaffold/challenge.py b/src/apis/scaffold/challenge.py
@@ -10,10 +10,12 @@
 from services.settings import logger, HCAPTCHA_DEMO_SITES, DIR_MODEL, DIR_CHALLENGE
 from services.utils import get_challenge_ctx
 
-SAMPLE_SITE = HCAPTCHA_DEMO_SITES[0]
 
-
-def demo(silence: Optional[bool] = False, onnx_prefix: Optional[str] = None):
+def demo(
+        silence: Optional[bool] = False,
+        onnx_prefix: Optional[str] = None,
+        sample_site: Optional[str] = HCAPTCHA_DEMO_SITES[0]
+):
     """人机挑战演示 顶级接口"""
     logger.info("Starting demo project...")
 
@@ -28,7 +30,7 @@ def demo(silence: Optional[bool] = False, onnx_prefix: Optional[str] = None):
     ctx = get_challenge_ctx(silence=silence)
     try:
         # 读取 hCaptcha challenge 测试站点
-        ctx.get(SAMPLE_SITE)
+        ctx.get(sample_site)
 
         # 必要的等待时间
         time.sleep(3)
@@ -52,12 +54,23 @@ def demo(silence: Optional[bool] = False, onnx_prefix: Optional[str] = None):
         ctx.quit()
 
 
+def demo_v2(silence: Optional[bool] = False, onnx_prefix: Optional[str] = None):
+    """
+    人机挑战演示 顶级接口 演示垂直河流 SKI 解法
+
+    :param silence:
+    :param onnx_prefix:
+    :return:
+    """
+    demo(silence, onnx_prefix, sample_site=HCAPTCHA_DEMO_SITES[1])
+
+
 @logger.catch()
 def test():
     """检查挑战者驱动版本是否适配"""
     ctx = get_challenge_ctx(silence=True)
     try:
-        ctx.get(SAMPLE_SITE)
+        ctx.get(HCAPTCHA_DEMO_SITES[0])
     finally:
         ctx.quit()
 

diff --git a/src/services/hcaptcha_challenger/__init__.py b/src/services/hcaptcha_challenger/__init__.py
@@ -3,8 +3,8 @@
 # Author     : QIN2DIM
 # Github     : https://github.com/QIN2DIM
 # Description:
-from .core import YOLO, ArmorCaptcha, ArmorUtils
-
+from .core import ArmorCaptcha, ArmorUtils
+from .solutions.yolo import YOLO
 __all__ = [
     "YOLO",
     "ArmorCaptcha",

diff --git a/src/services/hcaptcha_challenger/core.py b/src/services/hcaptcha_challenger/core.py
@@ -3,10 +3,6 @@
 import re
 import time
 import urllib.request
-from typing import Optional
-
-import cv2
-import numpy as np
 from loguru import logger
 from selenium.common.exceptions import (
     ElementNotVisibleException,
@@ -18,9 +14,11 @@
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
+from typing import Optional
 from undetected_chromedriver import Chrome
 
 from services.utils import AshFramework
+from .solutions import ski_river
 from .exceptions import (
     LabelNotFoundException,
     ChallengeReset,
@@ -29,172 +27,6 @@
 )
 
 
-class YOLO:
-    """YOLO model for image classification"""
-
-    def __init__(self, dir_model, onnx_prefix: str = "yolov5s6"):
-        self.dir_model = "./model" if dir_model is None else dir_model
-        self.onnx_prefix = (
-            "yolov5s6"
-            if onnx_prefix not in ["yolov5m6", "yolov5s6", "yolov5n6"]
-            else onnx_prefix
-        )
-
-        self.onnx_model = {
-            "name": f"{self.onnx_prefix}(onnx)_model",
-            "path": os.path.join(self.dir_model, f"{self.onnx_prefix}.onnx"),
-            "src": f"https://github.com/QIN2DIM/hcaptcha-challenger/releases/download/model/{self.onnx_prefix}.onnx",
-        }
-
-        # COCO namespace
-        self.classes = [
-            "person",
-            "bicycle",
-            "car",
-            "motorbike",
-            "aeroplane",
-            "bus",
-            "train",
-            "truck",
-            "boat",
-            "traffic light",
-            "fire hydrant",
-            "stop sign",
-            "parking meter",
-            "bench",
-            "bird",
-            "cat",
-            "dog",
-            "horse",
-            "sheep",
-            "cow",
-            "elephant",
-            "bear",
-            "zebra",
-            "giraffe",
-            "backpack",
-            "umbrella",
-            "handbag",
-            "tie",
-            "suitcase",
-            "frisbee",
-            "skis",
-            "snowboard",
-            "sports ball",
-            "kite",
-            "baseball bat",
-            "baseball glove",
-            "skateboard",
-            "surfboard",
-            "tennis racket",
-            "bottle",
-            "wine glass",
-            "cup",
-            "fork",
-            "knife",
-            "spoon",
-            "bowl",
-            "banana",
-            "apple",
-            "sandwich",
-            "orange",
-            "broccoli",
-            "carrot",
-            "hot dog",
-            "pizza",
-            "donut",
-            "cake",
-            "chair",
-            "sofa",
-            "pottedplant",
-            "bed",
-            "diningtable",
-            "toilet",
-            "tvmonitor",
-            "laptop",
-            "mouse",
-            "remote",
-            "keyboard",
-            "cell phone",
-            "microwave",
-            "oven",
-            "toaster",
-            "sink",
-            "refrigerator",
-            "book",
-            "clock",
-            "vase",
-            "scissors",
-            "teddy bear",
-            "hair drier",
-            "toothbrush",
-        ]
-
-    def download_model(self):
-        """Download model and weight parameters"""
-        if not os.path.exists(self.dir_model):
-            os.mkdir(self.dir_model)
-        if os.path.exists(self.onnx_model["path"]):
-            return
-
-        print(f"Downloading {self.onnx_model['name']} from {self.onnx_model['src']}")
-
-        urllib.request.urlretrieve(self.onnx_model["src"], self.onnx_model["path"])
-
-    def detect_common_objects(self, img_stream, confidence=0.4, nms_thresh=0.4):
-        """
-        Object Detection
-
-        Get multiple labels identified in a given image
-
-        :param img_stream: image file binary stream
-             with open(img_filepath, "rb") as file:
-                data = file.read()
-             detect_common_objects(img_stream=data)
-        :param confidence:
-        :param nms_thresh:
-        :return: bbox, label, conf
-        """
-        np_array = np.frombuffer(img_stream, np.uint8)
-        img = cv2.imdecode(np_array, flags=1)
-        height, width = img.shape[:2]
-
-        blob = cv2.dnn.blobFromImage(
-            img, 1 / 255.0, (128, 128), (0, 0, 0), swapRB=True, crop=False
-        )
-        self.download_model()
-
-        net = cv2.dnn.readNetFromONNX(self.onnx_model["path"])
-
-        net.setInput(blob)
-
-        class_ids = []
-        confidences = []
-        boxes = []
-
-        outs = net.forward()
-
-        for out in outs:
-            for detection in out:
-                scores = detection[5:]
-                class_id = np.argmax(scores)
-                max_conf = scores[class_id]
-                if max_conf > confidence:
-                    center_x = int(detection[0] * width)
-                    center_y = int(detection[1] * height)
-                    w = int(detection[2] * width)
-                    h = int(detection[3] * height)
-                    x = center_x - (w / 2)
-                    y = center_y - (h / 2)
-                    class_ids.append(class_id)
-                    confidences.append(float(max_conf))
-                    boxes.append([x, y, w, h])
-
-        indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence, nms_thresh)
-
-        return [str(self.classes[class_ids[i]]) for i in indices]
-
-
 class ArmorCaptcha:
     """hCAPTCHA challenge drive control"""
 
@@ -219,6 +51,7 @@ def __init__(self, dir_workspace: str = None, debug=False):
             "船": "boat",
             "汽车": "car",
             "摩托车": "motorbike",
+            "垂直河流": "vertical river"
         }
 
         # Store the `element locator` of challenge images {挑战图片1: locator1, ...}
@@ -266,6 +99,14 @@ def tactical_retreat(self) -> bool:
             return True
         return False
 
+    def switch_solution(self,mirror, label: Optional[str] = None):
+        """模型卸载"""
+        label = self.label if label is None else label
+
+        if label in ["垂直河流"]:
+            return ski_river.RiverChallenger()
+        return mirror
+
     def mark_samples(self, ctx: Chrome):
         """
         获取每个挑战图片的下载链接以及网页元素位置
@@ -379,7 +220,7 @@ async def control_driver(self, context, session=None):
 
         self.runtime_workspace = workspace_
 
-    def challenge(self, ctx: Chrome, model: YOLO, confidence=0.39, nms_thresh=0.7):
+    def challenge(self, ctx: Chrome, model):
         """
         图像分类，元素点击，答案提交
 
@@ -405,15 +246,13 @@ def challenge(self, ctx: Chrome, model: YOLO, confidence=0.39, nms_thresh=0.7):
             with open(img_filepath, "rb") as file:
                 data = file.read()
 
-            t0 = time.time()
             # 获取识别结果
-            labels = model.detect_common_objects(
-                data, confidence=confidence, nms_thresh=nms_thresh
-            )
+            t0 = time.time()
+            result = model.solution(img_stream=data, label=self.label_alias[self.label])
             ta.append(time.time() - t0)
 
             # 模型会根据置信度给出图片中的多个目标，只要命中一个就算通过
-            if self.label_alias[self.label] in labels:
+            if result:
                 # 选中标签元素
                 try:
                     self.alias2locator[alias].click()
@@ -432,7 +271,7 @@ def challenge(self, ctx: Chrome, model: YOLO, confidence=0.39, nms_thresh=0.7):
         except (TimeoutException, ElementClickInterceptedException):
             raise ChallengeTimeout("CPU 算力不足，无法在规定时间内完成挑战")
 
-        self.log(message=f"提交挑战 {model.onnx_model['name']}: {round(sum(ta), 2)}s")
+        self.log(message=f"提交挑战 {model.flag}: {round(sum(ta), 2)}s")
 
     def challenge_success(self, ctx: Chrome, init: bool = True):
         """
@@ -498,7 +337,7 @@ def _high_threat_proxy_access():
         self.log("挑战成功")
         return True
 
-    def anti_hcaptcha(self, ctx: Chrome, model: YOLO):
+    def anti_hcaptcha(self, ctx: Chrome, model):
         """
         Handle hcaptcha challenge
 
@@ -543,6 +382,10 @@ def anti_hcaptcha(self, ctx: Chrome, model: YOLO):
             ctx.switch_to.default_content()
             return False
 
+        # [👻] 注册解决方案
+        # 根据挑战类型自动匹配不同的模型
+        model = self.switch_solution(mirror=model)
+
         # [👻] 人机挑战！
         try:
             for index in range(2):

diff --git a/src/services/hcaptcha_challenger/solutions/__init__.py b/src/services/hcaptcha_challenger/solutions/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+# Time       : 2022/3/2 0:52
+# Author     : QIN2DIM
+# Github     : https://github.com/QIN2DIM
+# Description: