From 431b495baa1a6ff4ced9a23c89a8486feaffc43f Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 2 May 2024 22:30:20 +0200 Subject: [PATCH 1/4] add examples for azure --- examples/azure/inputs/books.xml | 120 ++++++++++++++++++ examples/azure/inputs/example.json | 182 +++++++++++++++++++++++++++ examples/azure/inputs/username.csv | 7 ++ examples/azure/json_scraper_azure.py | 68 ++++++++++ examples/azure/xml_scraper_azure.py | 68 ++++++++++ 5 files changed, 445 insertions(+) create mode 100644 examples/azure/inputs/books.xml create mode 100644 examples/azure/inputs/example.json create mode 100644 examples/azure/inputs/username.csv create mode 100644 examples/azure/json_scraper_azure.py create mode 100644 examples/azure/xml_scraper_azure.py diff --git a/examples/azure/inputs/books.xml b/examples/azure/inputs/books.xml new file mode 100644 index 00000000..e3d1fe87 --- /dev/null +++ b/examples/azure/inputs/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file diff --git a/examples/azure/inputs/example.json b/examples/azure/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/azure/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/examples/azure/inputs/username.csv b/examples/azure/inputs/username.csv new file mode 100644 index 00000000..006ac8e6 --- /dev/null +++ b/examples/azure/inputs/username.csv @@ -0,0 +1,7 @@ +Username; Identifier;First name;Last name +booker12;9012;Rachel;Booker +grey07;2070;Laura;Grey +johnson81;4081;Craig;Johnson +jenkins46;9346;Mary;Jenkins +smith79;5079;Jamie;Smith + diff --git a/examples/azure/json_scraper_azure.py b/examples/azure/json_scraper_azure.py new file mode 100644 index 00000000..5e634862 --- /dev/null +++ b/examples/azure/json_scraper_azure.py @@ -0,0 +1,68 @@ +""" +Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key +""" + +import os +from dotenv import load_dotenv +from langchain_openai import AzureChatOpenAI +from langchain_openai import AzureOpenAIEmbeddings +from scrapegraphai.graphs import JSONScraperGraph +from scrapegraphai.utils import prettify_exec_info + + +# required environment variable in .env +# AZURE_OPENAI_ENDPOINT +# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME +# MODEL_NAME +# AZURE_OPENAI_API_KEY +# OPENAI_API_TYPE +# AZURE_OPENAI_API_VERSION +# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME +load_dotenv() + + +FILE_NAME = "inputs/example.json" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Initialize the model instances +# ************************************************ + +llm_model_instance = AzureChatOpenAI( + openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], + azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] +) + +embedder_model_instance = AzureOpenAIEmbeddings( + azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"], + openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], +) + +# ************************************************ +# Create the JSONScraperGraph instance and run it +# ************************************************ + +graph_config = { + "llm": {"model_instance": llm_model_instance}, + "embeddings": {"model_instance": embedder_model_instance} +} + +smart_scraper_graph = JSONScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) diff --git a/examples/azure/xml_scraper_azure.py b/examples/azure/xml_scraper_azure.py new file mode 100644 index 00000000..696b8817 --- /dev/null +++ b/examples/azure/xml_scraper_azure.py @@ -0,0 +1,68 @@ +""" +Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key +""" + +import os +from dotenv import load_dotenv +from langchain_openai import AzureChatOpenAI +from langchain_openai import AzureOpenAIEmbeddings +from scrapegraphai.graphs import XMLScraperGraph +from scrapegraphai.utils import prettify_exec_info + + +# required environment variable in .env +# AZURE_OPENAI_ENDPOINT +# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME +# MODEL_NAME +# AZURE_OPENAI_API_KEY +# OPENAI_API_TYPE +# AZURE_OPENAI_API_VERSION +# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME +load_dotenv() + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + + +# ************************************************ +# Initialize the model instances +# ************************************************ + +llm_model_instance = AzureChatOpenAI( + openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], + azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] +) + +embedder_model_instance = AzureOpenAIEmbeddings( + azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"], + openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], +) + +# ************************************************ +# Create the XMLScraperGraph instance and run it +# ************************************************ + +graph_config = { + "llm": {"model_instance": llm_model_instance}, + "embeddings": {"model_instance": embedder_model_instance} +} + +smart_scraper_graph = XMLScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) From cb1cb616b7998d3624bf57b19b5f1b1945fea4ef Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Fri, 3 May 2024 14:18:12 +0200 Subject: [PATCH 2/4] feat: add base_node to __init__.py Useful for people to create custom nodes --- scrapegraphai/nodes/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapegraphai/nodes/__init__.py b/scrapegraphai/nodes/__init__.py index 2ee8769b..405d074d 100644 --- a/scrapegraphai/nodes/__init__.py +++ b/scrapegraphai/nodes/__init__.py @@ -2,6 +2,7 @@ __init__.py file for node folder """ +from .base_node import BaseNode from .fetch_node import FetchNode from .conditional_node import ConditionalNode from .get_probable_tags_node import GetProbableTagsNode From 98dec36c60d1dc8b072482e8d514c3869a45a3f8 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 3 May 2024 12:19:52 +0000 Subject: [PATCH 3/4] ci(release): 0.7.0-beta.1 [skip ci] ## [0.7.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.2...v0.7.0-beta.1) (2024-05-03) ### Features * add base_node to __init__.py ([cb1cb61](https://github.com/VinciGit00/Scrapegraph-ai/commit/cb1cb616b7998d3624bf57b19b5f1b1945fea4ef)) --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61557e62..14c376a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [0.7.0-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.2...v0.7.0-beta.1) (2024-05-03) + + +### Features + +* add base_node to __init__.py ([cb1cb61](https://github.com/VinciGit00/Scrapegraph-ai/commit/cb1cb616b7998d3624bf57b19b5f1b1945fea4ef)) + ## [0.6.2](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.1...v0.6.2) (2024-05-02) diff --git a/pyproject.toml b/pyproject.toml index 5ab6d492..4dd7ac13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "scrapegraphai" -version = "0.6.2" +version = "0.7.0b1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ From 819cbcd3be1a8cb195de0b44c6b6d4d824e2a42a Mon Sep 17 00:00:00 2001 From: S4mpl3r Date: Fri, 3 May 2024 16:14:27 +0330 Subject: [PATCH 4/4] refactor: Changed the way embedding model is created in AbstractGraph class and removed handling of embedding model creation from RAGNode. Now AbstractGraph will call a dedicated method for embedding models instead of _create_llm. This makes it easy to use any LLM with any supported embedding model. --- examples/groq/smart_scraper_groq_openai.py | 2 +- scrapegraphai/graphs/abstract_graph.py | 88 +++++++++++++++++++++- scrapegraphai/nodes/rag_node.py | 26 +------ 3 files changed, 88 insertions(+), 28 deletions(-) diff --git a/examples/groq/smart_scraper_groq_openai.py b/examples/groq/smart_scraper_groq_openai.py index 19f86145..47c42303 100644 --- a/examples/groq/smart_scraper_groq_openai.py +++ b/examples/groq/smart_scraper_groq_openai.py @@ -25,7 +25,7 @@ }, "embeddings": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "openai", }, "headless": False } diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index b8a9efe9..91e7fcf6 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -5,8 +5,12 @@ from abc import ABC, abstractmethod from typing import Optional -from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq, Bedrock +from langchain_aws.embeddings.bedrock import BedrockEmbeddings +from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings +from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings + from ..helpers import models_tokens +from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI class AbstractGraph(ABC): @@ -43,7 +47,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): self.source = source self.config = config self.llm_model = self._create_llm(config["llm"], chat=True) - self.embedder_model = self.llm_model if "embeddings" not in config else self._create_llm( + self.embedder_model = self._create_default_embedder( + ) if "embeddings" not in config else self._create_embedder( config["embeddings"]) # Set common configuration parameters @@ -165,6 +170,85 @@ def _create_llm(self, llm_config: dict, chat=False) -> object: else: raise ValueError( "Model provided by the configuration not supported") + + def _create_default_embedder(self) -> object: + """ + Create an embedding model instance based on the chosen llm model. + + Returns: + object: An instance of the embedding model client. + + Raises: + ValueError: If the model is not supported. + """ + + if isinstance(self.llm_model, OpenAI): + return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key) + elif isinstance(self.llm_model, AzureOpenAIEmbeddings): + return self.llm_model + elif isinstance(self.llm_model, AzureOpenAI): + return AzureOpenAIEmbeddings() + elif isinstance(self.llm_model, Ollama): + # unwrap the kwargs from the model whihc is a dict + params = self.llm_model._lc_kwargs + # remove streaming and temperature + params.pop("streaming", None) + params.pop("temperature", None) + + return OllamaEmbeddings(**params) + elif isinstance(self.llm_model, HuggingFace): + return HuggingFaceHubEmbeddings(model=self.llm_model.model) + elif isinstance(self.llm_model, Bedrock): + return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id) + else: + raise ValueError("Embedding Model missing or not supported") + + def _create_embedder(self, embedder_config: dict) -> object: + """ + Create an embedding model instance based on the configuration provided. + + Args: + embedder_config (dict): Configuration parameters for the embedding model. + + Returns: + object: An instance of the embedding model client. + + Raises: + KeyError: If the model is not supported. + """ + + # Instantiate the embedding model based on the model name + if "openai" in embedder_config["model"]: + return OpenAIEmbeddings(api_key=embedder_config["api_key"]) + + elif "azure" in embedder_config["model"]: + return AzureOpenAIEmbeddings() + + elif "ollama" in embedder_config["model"]: + embedder_config["model"] = embedder_config["model"].split("/")[-1] + try: + models_tokens["ollama"][embedder_config["model"]] + except KeyError: + raise KeyError("Model not supported") + return OllamaEmbeddings(**embedder_config) + + elif "hugging_face" in embedder_config["model"]: + try: + models_tokens["hugging_face"][embedder_config["model"]] + except KeyError: + raise KeyError("Model not supported") + return HuggingFaceHubEmbeddings(model=embedder_config["model"]) + + elif "bedrock" in embedder_config["model"]: + embedder_config["model"] = embedder_config["model"].split("/")[-1] + try: + models_tokens["bedrock"][embedder_config["model"]] + except KeyError: + raise KeyError("Model not supported") + return BedrockEmbeddings(client=None, model_id=embedder_config["model"]) + else: + raise ValueError( + "Model provided by the configuration not supported") def get_state(self, key=None) -> dict: """"" diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index 92e7011f..4108a56c 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -87,31 +87,7 @@ def execute(self, state: dict) -> dict: if self.verbose: print("--- (updated chunks metadata) ---") - # check if embedder_model is provided, if not use llm_model - embedding_model = self.embedder_model if self.embedder_model else self.llm_model - - if isinstance(embedding_model, OpenAI): - embeddings = OpenAIEmbeddings( - api_key=embedding_model.openai_api_key) - elif isinstance(embedding_model, AzureOpenAIEmbeddings): - embeddings = embedding_model - elif isinstance(embedding_model, AzureOpenAI): - embeddings = AzureOpenAIEmbeddings() - elif isinstance(embedding_model, Ollama): - # unwrap the kwargs from the model whihc is a dict - params = embedding_model._lc_kwargs - # remove streaming and temperature - params.pop("streaming", None) - params.pop("temperature", None) - - embeddings = OllamaEmbeddings(**params) - elif isinstance(embedding_model, HuggingFace): - embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model) - elif isinstance(embedding_model, Bedrock): - embeddings = BedrockEmbeddings( - client=None, model_id=embedding_model.model_id) - else: - raise ValueError("Embedding Model missing or not supported") + embeddings = self.embedder_model retriever = FAISS.from_documents( chunked_docs, embeddings).as_retriever()