From b8ef93738ec4ae48c361fe5650df5194e845a2b1 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Mon, 2 Sep 2024 15:03:54 +0200 Subject: [PATCH 1/2] fix(ScreenshotScraper): impose dynamic imports --- .../screenshot_preparation.py | 15 +++++++-- .../screenshot_scraping/text_detection.py | 33 +++++++++++-------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py index 6205449c..10a5256a 100644 --- a/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py +++ b/scrapegraphai/utils/screenshot_scraping/screenshot_preparation.py @@ -5,7 +5,6 @@ from io import BytesIO from PIL import Image, ImageGrab from playwright.async_api import async_playwright -import cv2 as cv import numpy as np from io import BytesIO @@ -42,6 +41,12 @@ def select_area_with_opencv(image): A tuple containing the LEFT, TOP, RIGHT, and BOTTOM coordinates of the selected area. """ + try: + import cv2 as cv + except ImportError: + raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.") + + fullscreen_screenshot = ImageGrab.grab() dw, dh = fullscreen_screenshot.size @@ -116,8 +121,12 @@ def select_area_with_ipywidget(image): import matplotlib.pyplot as plt import numpy as np - from ipywidgets import interact, IntSlider - import ipywidgets as widgets + try: + from ipywidgets import interact, IntSlider + import ipywidgets as widgets + except: + raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.") + from PIL import Image img_array = np.array(image) diff --git a/scrapegraphai/utils/screenshot_scraping/text_detection.py b/scrapegraphai/utils/screenshot_scraping/text_detection.py index 8c33671f..16367a21 100644 --- a/scrapegraphai/utils/screenshot_scraping/text_detection.py +++ b/scrapegraphai/utils/screenshot_scraping/text_detection.py @@ -1,24 +1,29 @@ """ text_detection_module """ -from surya.ocr import run_ocr -from surya.model.detection.model import (load_model as load_det_model, - load_processor as load_det_processor) -from surya.model.recognition.model import load_model as load_rec_model -from surya.model.recognition.processor import load_processor as load_rec_processor def detect_text(image, languages: list = ["en"]): """ - Detects and extracts text from a given image. - Parameters: - image (PIL Image): The input image to extract text from. - lahguages (list): A list of languages to detect text in. Defaults to ["en"]. List of languages can be found here: https://github.com/VikParuchuri/surya/blob/master/surya/languages.py - Returns: - str: The extracted text from the image. - Notes: - Model weights will automatically download the first time you run this function. - """ + Detects and extracts text from a given image. + Parameters: + image (PIL Image): The input image to extract text from. + lahguages (list): A list of languages to detect text in. Defaults to ["en"]. List of languages can be found here: https://github.com/VikParuchuri/surya/blob/master/surya/languages.py + Returns: + str: The extracted text from the image. + Notes: + Model weights will automatically download the first time you run this function. + """ + + try: + from surya.ocr import run_ocr + from surya.model.detection.model import (load_model as load_det_model, + load_processor as load_det_processor) + from surya.model.recognition.model import load_model as load_rec_model + from surya.model.recognition.processor import load_processor as load_rec_processor + except: + raise ImportError("The dependencies for screenshot scraping are not installed. Please install them using `pip install scrapegraphai[screenshot_scraper]`.") + langs = languages det_processor, det_model = load_det_processor(), load_det_model() From 52421665759032bcfad80ce540efebe5f47310f6 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Mon, 2 Sep 2024 15:04:54 +0200 Subject: [PATCH 2/2] fix(SmartScraper): pass llm_model to ParseNode --- scrapegraphai/graphs/smart_scraper_graph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index 0167103e..4a8416c8 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -74,6 +74,7 @@ def _create_graph(self) -> BaseGraph: input="doc", output=["parsed_doc"], node_config={ + "llm_model": self.llm_model, "chunk_size": self.model_token } )