Skip to content

Commit

Permalink
feat: add logger integration
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed May 14, 2024
1 parent 218b8ed commit e53766b
Show file tree
Hide file tree
Showing 17 changed files with 195 additions and 34 deletions.
11 changes: 6 additions & 5 deletions scrapegraphai/nodes/fetch_node.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
""""
FetchNode Module
"""

Expand All @@ -13,7 +13,7 @@
from ..docloaders import ChromiumLoader
from .base_node import BaseNode
from ..utils.cleanup_html import cleanup_html

from ..utils.logging import get_logger

class FetchNode(BaseNode):
"""
Expand Down Expand Up @@ -74,7 +74,8 @@ def execute(self, state):
necessary information to perform the operation is missing.
"""
if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("fetch node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down Expand Up @@ -128,7 +129,7 @@ def execute(self, state):
cleanedup_html = cleanup_html(response.text, source)
compressed_document = [Document(page_content=cleanedup_html)]
else:
print(f"Failed to retrieve contents from the webpage at url: {source}")
logger.warning(f"Failed to retrieve contents from the webpage at url: {source}")

else:
loader_kwargs = {}
Expand All @@ -144,4 +145,4 @@ def execute(self, state):
]

state.update({self.output[0]: compressed_document})
return state
return state
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/generate_answer_csv_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -72,7 +73,8 @@ def execute(self, state):
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("generate_answer csv node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -59,7 +60,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("generate answer node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/generate_answer_pdf_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -72,7 +73,8 @@ def execute(self, state):
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("generate answer pdf node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/generate_scraper_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel
from ..utils.logging import get_logger

# Imports from the library
from .base_node import BaseNode
Expand Down Expand Up @@ -63,7 +64,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("generate scraper node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
13 changes: 8 additions & 5 deletions scrapegraphai/nodes/get_probable_tags_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from .base_node import BaseNode

from ..utils.logging import get_logger

class GetProbableTagsNode(BaseNode):
"""
Expand All @@ -25,11 +25,12 @@ class GetProbableTagsNode(BaseNode):
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
"""

def __init__(self, input: str, output: List[str], model_config: dict,
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GetProbableTags"):
super().__init__(node_name, "node", input, output, 2, model_config)
super().__init__(node_name, "node", input, output, 2, node_config)

self.llm_model = model_config["llm_model"]
self.llm_model = node_config["llm_model"]
self.verbose = False if node_config is None else node_config.get("verbose", False)

def execute(self, state: dict) -> dict:
"""
Expand All @@ -49,7 +50,9 @@ def execute(self, state: dict) -> dict:
necessary information for generating tag predictions is missing.
"""

print(f"--- Executing {self.node_name} Node ---")
if self.verbose:
logger = get_logger("get probable tags node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/graph_iterator_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import asyncio
import copy
from typing import List, Optional

from ..utils.logging import get_logger
from tqdm.asyncio import tqdm

from .base_node import BaseNode
Expand Down
4 changes: 3 additions & 1 deletion scrapegraphai/nodes/image_to_text_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import List, Optional
from .base_node import BaseNode
from ..utils.logging import get_logger


class ImageToTextNode(BaseNode):
Expand Down Expand Up @@ -42,7 +43,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print("---GENERATING TEXT FROM IMAGE---")
logger = get_logger("image to text node")
logger.info(f"--- Executing {self.node_name} Node ---")

input_keys = self.get_input_keys(state)
input_data = [state[key] for key in input_keys]
Expand Down
5 changes: 3 additions & 2 deletions scrapegraphai/nodes/merge_answers_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Imports from standard library
from typing import List, Optional
from tqdm import tqdm

from ..utils.logging import get_logger
# Imports from Langchain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
Expand Down Expand Up @@ -54,7 +54,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("fetch node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
5 changes: 3 additions & 2 deletions scrapegraphai/nodes/parse_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import Html2TextTransformer
from .base_node import BaseNode

from ..utils.logging import get_logger

class ParseNode(BaseNode):
"""
Expand Down Expand Up @@ -49,7 +49,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("fetch node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
6 changes: 4 additions & 2 deletions scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.vectorstores import FAISS
from ..utils.logging import get_logger

from .base_node import BaseNode

Expand Down Expand Up @@ -55,9 +56,10 @@ def execute(self, state: dict) -> dict:
KeyError: If the input keys are not found in the state, indicating that the
necessary information for compressing the content is missing.
"""
logger = get_logger("rag node")

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand All @@ -80,7 +82,7 @@ def execute(self, state: dict) -> dict:
chunked_docs.append(doc)

if self.verbose:
print("--- (updated chunks metadata) ---")
logger.info("--- (updated chunks metadata) ---")

# check if embedder_model is provided, if not use llm_model
self.embedder_model = self.embedder_model if self.embedder_model else self.llm_model
Expand Down
11 changes: 6 additions & 5 deletions scrapegraphai/nodes/robots_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from langchain.output_parsers import CommaSeparatedListOutputParser
from .base_node import BaseNode
from ..helpers import robots_dictionary

from ..utils.logging import get_logger

class RobotsNode(BaseNode):
"""
Expand Down Expand Up @@ -61,9 +61,10 @@ def execute(self, state: dict) -> dict:
ValueError: If the website is not scrapeable based on the robots.txt file and
scraping is not enforced.
"""
logger = get_logger("robots node")

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down Expand Up @@ -121,17 +122,17 @@ def execute(self, state: dict) -> dict:

if "no" in is_scrapable:
if self.verbose:
print("\033[31m(Scraping this website is not allowed)\033[0m")
logger.warning("\033[31m(Scraping this website is not allowed)\033[0m")

if not self.force_scraping:
raise ValueError(
'The website you selected is not scrapable')
else:
if self.verbose:
print("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
else:
if self.verbose:
print("\033[32m(Scraping this website is allowed)\033[0m")
logger.warning("\033[32m(Scraping this website is allowed)\033[0m")

state.update({self.output[0]: is_scrapable})
return state
8 changes: 5 additions & 3 deletions scrapegraphai/nodes/search_internet_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from langchain.prompts import PromptTemplate
from ..utils.research_web import search_on_web
from .base_node import BaseNode

from ..utils.logging import get_logger

class SearchInternetNode(BaseNode):
"""
Expand Down Expand Up @@ -54,9 +54,10 @@ def execute(self, state: dict) -> dict:
KeyError: If the input keys are not found in the state, indicating that the
necessary information for generating the answer is missing.
"""
logger = get_logger("search interne node")

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger.info(f"--- Executing {self.node_name} Node ---")

input_keys = self.get_input_keys(state)

Expand Down Expand Up @@ -88,7 +89,8 @@ def execute(self, state: dict) -> dict:
search_query = search_answer.invoke({"user_prompt": user_prompt})[0]

if self.verbose:
print(f"Search Query: {search_query}")
logger.info(f"Search Query: {search_query}")


answer = search_on_web(
query=search_query, max_results=self.max_results)
Expand Down
5 changes: 3 additions & 2 deletions scrapegraphai/nodes/search_link_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Imports from standard library
from typing import List, Optional
from tqdm import tqdm

from ..utils.logging import get_logger

# Imports from Langchain
from langchain.prompts import PromptTemplate
Expand Down Expand Up @@ -59,7 +59,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("search link node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
5 changes: 3 additions & 2 deletions scrapegraphai/nodes/text_to_speech_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from typing import List, Optional
from .base_node import BaseNode

from ..utils.logging import get_logger

class TextToSpeechNode(BaseNode):
"""
Expand Down Expand Up @@ -45,7 +45,8 @@ def execute(self, state: dict) -> dict:
"""

if self.verbose:
print(f"--- Executing {self.node_name} Node ---")
logger = get_logger("text to speach node")
logger.info(f"--- Executing {self.node_name} Node ---")

# Interpret input keys based on the provided input expression
input_keys = self.get_input_keys(state)
Expand Down
1 change: 1 addition & 0 deletions scrapegraphai/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
from .save_audio_from_bytes import save_audio_from_bytes
from .sys_dynamic_import import dynamic_import, srcfile_import
from .cleanup_html import cleanup_html
from .logging import *
Loading

0 comments on commit e53766b

Please sign in to comment.