From b7539d4725cb2494c91cf8eea3507c4e1a2d9702 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 06:25:43 +0200 Subject: [PATCH 01/10] Added update_config function to base_node.py --- scrapegraphai/nodes/base_node.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py index f3329320..803cb85e 100644 --- a/scrapegraphai/nodes/base_node.py +++ b/scrapegraphai/nodes/base_node.py @@ -68,6 +68,21 @@ def execute(self, state: dict) -> dict: pass + def update_config(self, params: dict, overwrite: bool = False): + """ + Updates the node_config dictionary as well as attributes with same key. + + Args: + param (dict): The dictionary to update node_config with. + overwrite (bool): Flag indicating if the values of node_config should be overwritten if their value is not None. + """ + if self.node_config is None: + self.node_config = {} + for key, val in params.items(): + if hasattr(self, key) and key in self.node_config and overwrite: + self.node_config[key] = val + setattr(self, key, val) + def get_input_keys(self, state: dict) -> List[str]: """ Determines the necessary state keys based on the input specification. From c2c61625b3e62f9e408a691f43ba96c169ca917a Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 06:37:31 +0200 Subject: [PATCH 02/10] Corrected logic of update_config function in base_node.py --- scrapegraphai/nodes/base_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py index 803cb85e..cabfeda0 100644 --- a/scrapegraphai/nodes/base_node.py +++ b/scrapegraphai/nodes/base_node.py @@ -79,7 +79,7 @@ def update_config(self, params: dict, overwrite: bool = False): if self.node_config is None: self.node_config = {} for key, val in params.items(): - if hasattr(self, key) and key in self.node_config and overwrite: + if hasattr(self, key) and (key not in self.node_config or overwrite): self.node_config[key] = val setattr(self, key, val) From 729d5d75975cdce0474d544cf91010e107955447 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 09:36:11 +0200 Subject: [PATCH 03/10] Changed node_config["llm"] to node_config["llm_model"] --- scrapegraphai/nodes/generate_answer_csv_node.py | 8 ++++---- scrapegraphai/nodes/generate_answer_node.py | 2 +- scrapegraphai/nodes/generate_answer_node_csv.py | 8 ++++---- scrapegraphai/nodes/generate_scraper_node.py | 2 +- scrapegraphai/nodes/rag_node.py | 2 +- scrapegraphai/nodes/robots_node.py | 2 +- scrapegraphai/nodes/search_internet_node.py | 2 +- scrapegraphai/nodes/search_link_node.py | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py index ac861816..6d2b84fc 100644 --- a/scrapegraphai/nodes/generate_answer_csv_node.py +++ b/scrapegraphai/nodes/generate_answer_csv_node.py @@ -22,14 +22,14 @@ class GenerateAnswerCSVNode(BaseNode): an answer. Attributes: - llm: An instance of a language model client, configured for generating answers. + llm_model: An instance of a language model client, configured for generating answers. node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswerNodeCsv". node_type (str): The type of the node, set to "node" indicating a standard operational node. Args: - llm: An instance of the language model client (e.g., ChatOpenAI) used + llm_model: An instance of the language model client (e.g., ChatOpenAI) used for generating answers. node_name (str, optional): The unique identifier name for the node. Defaults to "GenerateAnswerNodeCsv". @@ -44,11 +44,11 @@ def __init__(self, input: str, output: List[str], node_config: dict, """ Initializes the GenerateAnswerNodeCsv with a language model client and a node name. Args: - llm: An instance of the OpenAIImageToText class. + llm_model: An instance of the OpenAIImageToText class. node_name (str): name of the node """ super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.verbose = True if node_config is None else node_config.get( "verbose", False) diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index e9b4dd40..df3078ef 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -37,7 +37,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "GenerateAnswer"): super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.verbose = True if node_config is None else node_config.get("verbose", False) def execute(self, state: dict) -> dict: diff --git a/scrapegraphai/nodes/generate_answer_node_csv.py b/scrapegraphai/nodes/generate_answer_node_csv.py index ac861816..6d2b84fc 100644 --- a/scrapegraphai/nodes/generate_answer_node_csv.py +++ b/scrapegraphai/nodes/generate_answer_node_csv.py @@ -22,14 +22,14 @@ class GenerateAnswerCSVNode(BaseNode): an answer. Attributes: - llm: An instance of a language model client, configured for generating answers. + llm_model: An instance of a language model client, configured for generating answers. node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswerNodeCsv". node_type (str): The type of the node, set to "node" indicating a standard operational node. Args: - llm: An instance of the language model client (e.g., ChatOpenAI) used + llm_model: An instance of the language model client (e.g., ChatOpenAI) used for generating answers. node_name (str, optional): The unique identifier name for the node. Defaults to "GenerateAnswerNodeCsv". @@ -44,11 +44,11 @@ def __init__(self, input: str, output: List[str], node_config: dict, """ Initializes the GenerateAnswerNodeCsv with a language model client and a node name. Args: - llm: An instance of the OpenAIImageToText class. + llm_model: An instance of the OpenAIImageToText class. node_name (str): name of the node """ super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.verbose = True if node_config is None else node_config.get( "verbose", False) diff --git a/scrapegraphai/nodes/generate_scraper_node.py b/scrapegraphai/nodes/generate_scraper_node.py index 9c80fc19..2e1f959e 100644 --- a/scrapegraphai/nodes/generate_scraper_node.py +++ b/scrapegraphai/nodes/generate_scraper_node.py @@ -40,7 +40,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, library: str, website: str, node_name: str = "GenerateAnswer"): super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.library = library self.source = website diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index 92e7011f..f25f871d 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -39,7 +39,7 @@ class RAGNode(BaseNode): def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "RAG"): super().__init__(node_name, "node", input, output, 2, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.embedder_model = node_config.get("embedder_model", None) self.verbose = True if node_config is None else node_config.get( "verbose", False) diff --git a/scrapegraphai/nodes/robots_node.py b/scrapegraphai/nodes/robots_node.py index 001de62d..8c341183 100644 --- a/scrapegraphai/nodes/robots_node.py +++ b/scrapegraphai/nodes/robots_node.py @@ -38,7 +38,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, force_scra node_name: str = "Robots"): super().__init__(node_name, "node", input, output, 1) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.force_scraping = force_scraping self.verbose = True if node_config is None else node_config.get("verbose", False) diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 00cf9211..01095ef8 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -31,7 +31,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "SearchInternet"): super().__init__(node_name, "node", input, output, 1, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.verbose = True if node_config is None else node_config.get("verbose", False) def execute(self, state: dict) -> dict: diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py index 7f766b5b..037b862e 100644 --- a/scrapegraphai/nodes/search_link_node.py +++ b/scrapegraphai/nodes/search_link_node.py @@ -37,7 +37,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "GenerateLinks"): super().__init__(node_name, "node", input, output, 1, node_config) - self.llm_model = node_config["llm"] + self.llm_model = node_config["llm_model"] self.verbose = True if node_config is None else node_config.get("verbose", False) def execute(self, state: dict) -> dict: From 2178485b3d1ef06ff87e2ba52621169214307101 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 09:47:13 +0200 Subject: [PATCH 04/10] Adjusted graphs to reflect node_config change --- scrapegraphai/graphs/csv_scraper_graph.py | 4 ++-- scrapegraphai/graphs/json_scraper_graph.py | 4 ++-- scrapegraphai/graphs/script_creator_graph.py | 4 ++-- scrapegraphai/graphs/search_graph.py | 6 +++--- scrapegraphai/graphs/smart_scraper_graph.py | 4 ++-- scrapegraphai/graphs/speech_graph.py | 4 ++-- scrapegraphai/graphs/xml_scraper_graph.py | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py index 9a5eb931..cf6c9e28 100644 --- a/scrapegraphai/graphs/csv_scraper_graph.py +++ b/scrapegraphai/graphs/csv_scraper_graph.py @@ -49,7 +49,7 @@ def _create_graph(self): input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -58,7 +58,7 @@ def _create_graph(self): input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index f7392212..e99d82f4 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -73,7 +73,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -82,7 +82,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py index 105048db..b34bdf91 100644 --- a/scrapegraphai/graphs/script_creator_graph.py +++ b/scrapegraphai/graphs/script_creator_graph.py @@ -77,7 +77,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -85,7 +85,7 @@ def _create_graph(self) -> BaseGraph: generate_scraper_node = GenerateScraperNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={"llm": self.llm_model, + node_config={"llm_model": self.llm_model, "verbose": self.verbose}, library=self.library, website=self.source diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py index 41548a77..09d05a99 100644 --- a/scrapegraphai/graphs/search_graph.py +++ b/scrapegraphai/graphs/search_graph.py @@ -50,7 +50,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt", output=["url"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) @@ -74,7 +74,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -83,7 +83,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index 4d6b0e93..d9077d49 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -75,7 +75,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -84,7 +84,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index 3edadfd0..d4a2b669 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -74,7 +74,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -83,7 +83,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index c84e1506..168cb4a0 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -75,7 +75,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "embedder_model": self.embedder_model, "verbose": self.verbose } @@ -84,7 +84,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model, + "llm_model": self.llm_model, "verbose": self.verbose } ) From 444a13a6a6dfd4896378290ec4125782f3f78f0d Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 10:15:37 +0200 Subject: [PATCH 05/10] Created set_common_params function --- scrapegraphai/graphs/abstract_graph.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index b8a9efe9..a7137a2f 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -46,16 +46,30 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): self.embedder_model = self.llm_model if "embeddings" not in config else self._create_llm( config["embeddings"]) + # Create the graph + self.graph = self._create_graph() + self.final_state = None + self.execution_info = None + # Set common configuration parameters self.verbose = True if config is None else config.get("verbose", False) self.headless = True if config is None else config.get( "headless", True) + common_params = {"verbose": self.verbose, + "headless": self.headless} + self.set_common_params(common_params) - # Create the graph - self.graph = self._create_graph() - self.final_state = None - self.execution_info = None + def set_common_params(self, params: dict): + """ + Pass parameters to every node in the graph unless otherwise defined in the graph. + + Args: + params (dict): Common parameters and their values. + """ + + for node in self.graph.nodes: + node.update_config(params) def _set_model_token(self, llm): From 4dc6049e7603833b7284eef62bc4d8a2e8689c86 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 10:40:04 +0200 Subject: [PATCH 06/10] Simplified create graph functions using common params --- scrapegraphai/graphs/abstract_graph.py | 8 +++++--- scrapegraphai/graphs/csv_scraper_graph.py | 17 ----------------- scrapegraphai/graphs/json_scraper_graph.py | 14 -------------- scrapegraphai/graphs/script_creator_graph.py | 12 ------------ scrapegraphai/graphs/search_graph.py | 18 ------------------ scrapegraphai/graphs/smart_scraper_graph.py | 14 -------------- scrapegraphai/graphs/speech_graph.py | 15 --------------- scrapegraphai/graphs/xml_scraper_graph.py | 14 -------------- 8 files changed, 5 insertions(+), 107 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index a7137a2f..46f1d62f 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -55,9 +55,11 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): self.verbose = True if config is None else config.get("verbose", False) self.headless = True if config is None else config.get( "headless", True) - common_params = {"verbose": self.verbose, - "headless": self.headless} - self.set_common_params(common_params) + common_params = {"headless": self.headless, + "verbose": self.verbose, + "llm_model": self.llm_model, + "embedder_model": self.embedder_model} + self.set_common_params(common_params, overwrite=False) def set_common_params(self, params: dict): diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py index cf6c9e28..b58f52df 100644 --- a/scrapegraphai/graphs/csv_scraper_graph.py +++ b/scrapegraphai/graphs/csv_scraper_graph.py @@ -32,35 +32,18 @@ def _create_graph(self): fetch_node = FetchNode( input="csv_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], - node_config={ - "chunk_size": self.model_token, - "verbose": self.verbose - } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerCSVNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) return BaseGraph( diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index e99d82f4..4f1ce8f2 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -56,35 +56,21 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="json_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) return BaseGraph( diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py index b34bdf91..d2e7806a 100644 --- a/scrapegraphai/graphs/script_creator_graph.py +++ b/scrapegraphai/graphs/script_creator_graph.py @@ -61,32 +61,20 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="url | local_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={"chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_scraper_node = GenerateScraperNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={"llm_model": self.llm_model, - "verbose": self.verbose}, library=self.library, website=self.source ) diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py index 09d05a99..afde1baa 100644 --- a/scrapegraphai/graphs/search_graph.py +++ b/scrapegraphai/graphs/search_graph.py @@ -49,43 +49,25 @@ def _create_graph(self) -> BaseGraph: search_internet_node = SearchInternetNode( input="user_prompt", output=["url"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) fetch_node = FetchNode( input="url | local_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) return BaseGraph( diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index d9077d49..a7cd0f3e 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -58,35 +58,21 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="url | local_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) return BaseGraph( diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index d4a2b669..0bab6420 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -57,42 +57,27 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="url | local_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) text_to_speech_node = TextToSpeechNode( input="answer", output=["audio"], node_config={ "tts_model": OpenAITextToSpeech(self.config["tts_model"]), - "verbose": self.verbose } ) diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 168cb4a0..7c649d78 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -58,35 +58,21 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="xml_dir", output=["doc"], - node_config={ - "headless": self.headless, - "verbose": self.verbose - } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ "chunk_size": self.model_token, - "verbose": self.verbose } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - "verbose": self.verbose - } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={ - "llm_model": self.llm_model, - "verbose": self.verbose - } ) return BaseGraph( From 8d0e109a70905439927cec383760a6a5675b6bcf Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 14:32:58 +0200 Subject: [PATCH 07/10] Added overwrite keyword to set_common_params` --- scrapegraphai/graphs/abstract_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 46f1d62f..9087901f 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -62,7 +62,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): self.set_common_params(common_params, overwrite=False) - def set_common_params(self, params: dict): + def set_common_params(self, params: dict, overwrite=False): """ Pass parameters to every node in the graph unless otherwise defined in the graph. @@ -71,7 +71,7 @@ def set_common_params(self, params: dict): """ for node in self.graph.nodes: - node.update_config(params) + node.update_config(params, overwrite) def _set_model_token(self, llm): From a53e95cbf0b52fbb68eb42f3ca85b20ef9c91e99 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 14:36:16 +0200 Subject: [PATCH 08/10] Corrected graphs to use common params --- scrapegraphai/graphs/csv_scraper_graph.py | 12 +++++++++++- scrapegraphai/graphs/json_scraper_graph.py | 11 +++++++++-- scrapegraphai/graphs/script_creator_graph.py | 7 ++++++- scrapegraphai/graphs/search_graph.py | 16 +++++++++++++--- scrapegraphai/graphs/smart_scraper_graph.py | 13 ++++++++++--- scrapegraphai/graphs/speech_graph.py | 14 ++++++++++---- scrapegraphai/graphs/xml_scraper_graph.py | 13 ++++++++++--- 7 files changed, 69 insertions(+), 17 deletions(-) diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py index b58f52df..24c19234 100644 --- a/scrapegraphai/graphs/csv_scraper_graph.py +++ b/scrapegraphai/graphs/csv_scraper_graph.py @@ -36,14 +36,24 @@ def _create_graph(self): parse_node = ParseNode( input="doc", output=["parsed_doc"], + node_config={ + "chunk_size": self.model_token, + } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model, + } ) generate_answer_node = GenerateAnswerCSVNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm_model": self.llm_model, + } ) return BaseGraph( @@ -68,4 +78,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index 4f1ce8f2..843fa2b7 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -61,16 +61,23 @@ def _create_graph(self) -> BaseGraph: input="doc", output=["parsed_doc"], node_config={ - "chunk_size": self.model_token, + "chunk_size": self.model_token } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model + } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm": self.llm_model + } ) return BaseGraph( @@ -99,4 +106,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py index d2e7806a..5ffc358b 100644 --- a/scrapegraphai/graphs/script_creator_graph.py +++ b/scrapegraphai/graphs/script_creator_graph.py @@ -71,10 +71,15 @@ def _create_graph(self) -> BaseGraph: rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model + } ) generate_scraper_node = GenerateScraperNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={"llm_model": self.llm_model}, library=self.library, website=self.source ) @@ -105,4 +110,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py index afde1baa..9c463e1a 100644 --- a/scrapegraphai/graphs/search_graph.py +++ b/scrapegraphai/graphs/search_graph.py @@ -49,25 +49,35 @@ def _create_graph(self) -> BaseGraph: search_internet_node = SearchInternetNode( input="user_prompt", output=["url"], + node_config={ + "llm_model": self.llm_model + } ) fetch_node = FetchNode( input="url | local_dir", - output=["doc"], + output=["doc"] ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ - "chunk_size": self.model_token, + "chunk_size": self.model_token } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model + } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm_model": self.llm_model + } ) return BaseGraph( @@ -98,4 +108,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index a7cd0f3e..a9e63823 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -57,22 +57,29 @@ def _create_graph(self) -> BaseGraph: """ fetch_node = FetchNode( input="url | local_dir", - output=["doc"], + output=["doc"] ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ - "chunk_size": self.model_token, + "chunk_size": self.model_token } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model + } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm_model": self.llm_model + } ) return BaseGraph( @@ -101,4 +108,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index 0bab6420..3ca2b703 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -56,28 +56,34 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="url | local_dir", - output=["doc"], + output=["doc"] ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ - "chunk_size": self.model_token, + "chunk_size": self.model_token } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm_model": self.llm_model + } ) text_to_speech_node = TextToSpeechNode( input="answer", output=["audio"], node_config={ - "tts_model": OpenAITextToSpeech(self.config["tts_model"]), + "tts_model": OpenAITextToSpeech(self.config["tts_model"]) } ) @@ -116,4 +122,4 @@ def run(self) -> str: "output_path", "output.mp3")) print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}") - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 7c649d78..945dc165 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -57,22 +57,29 @@ def _create_graph(self) -> BaseGraph: fetch_node = FetchNode( input="xml_dir", - output=["doc"], + output=["doc"] ) parse_node = ParseNode( input="doc", output=["parsed_doc"], node_config={ - "chunk_size": self.model_token, + "chunk_size": self.model_token } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], + node_config={ + "llm_model": self.llm_model, + "embedder_model": self.embedder_model + } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], + node_config={ + "llm_model": self.llm_model + } ) return BaseGraph( @@ -101,4 +108,4 @@ def run(self) -> str: inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") + return self.final_state.get("answer", "No answer found.") \ No newline at end of file From 3ae2ea1dbd390ab10d20f2845d11d6351ee98ed8 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 15:58:50 +0200 Subject: [PATCH 09/10] Miscellaneous "llm" -> "llm_model" refactors --- examples/openai/custom_graph_openai.py | 6 +++--- scrapegraphai/graphs/json_scraper_graph.py | 2 +- scrapegraphai/nodes/fetch_node.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/openai/custom_graph_openai.py b/examples/openai/custom_graph_openai.py index dab82c1f..5744669b 100644 --- a/examples/openai/custom_graph_openai.py +++ b/examples/openai/custom_graph_openai.py @@ -34,7 +34,7 @@ robot_node = RobotsNode( input="url", output=["is_scrapable"], - node_config={"llm": llm_model} + node_config={"llm_model": llm_model} ) fetch_node = FetchNode( @@ -50,12 +50,12 @@ rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={"llm": llm_model}, + node_config={"llm_model": llm_model}, ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={"llm": llm_model}, + node_config={"llm_model": llm_model}, ) # ************************************************ diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index 843fa2b7..aec41195 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -76,7 +76,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], node_config={ - "llm": self.llm_model + "llm_model": self.llm_model } ) diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index f873654d..82d67949 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -29,7 +29,7 @@ class FetchNode(BaseNode): node_name (str): The unique identifier name for the node, defaulting to "Fetch". """ - def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"): + def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, node_name: str = "Fetch"): super().__init__(node_name, "node", input, output, 1) self.headless = True if node_config is None else node_config.get("headless", True) From f10a44a1ddd7a34c6a3dc4af056abdb3ffb33a97 Mon Sep 17 00:00:00 2001 From: Eric Page Date: Sun, 5 May 2024 16:13:05 +0200 Subject: [PATCH 10/10] Resolved key error "llm" -> "llm_model" --- examples/single_node/robot_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/single_node/robot_node.py b/examples/single_node/robot_node.py index 0e446262..257c4efb 100644 --- a/examples/single_node/robot_node.py +++ b/examples/single_node/robot_node.py @@ -26,7 +26,7 @@ robots_node = RobotsNode( input="url", output=["is_scrapable"], - node_config={"llm": llm_model, + node_config={"llm_model": llm_model, "headless": False } )