From 2ae19aee56bed32f5ae34715c29f464bfa329f8e Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 22 Jul 2024 16:50:58 +0200
Subject: [PATCH 01/51] update function

---
 pyproject.toml                              |   3 +-
 requirements-dev.lock                       | 585 --------------------
 requirements-dev.txt                        |   4 -
 requirements.lock                           | 366 ------------
 scrapegraphai/nodes/generate_answer_node.py |  68 +--
 scrapegraphai/utils/__init__.py             |   1 +
 scrapegraphai/utils/merge_results.py        |  30 +
 7 files changed, 70 insertions(+), 987 deletions(-)
 delete mode 100644 requirements-dev.lock
 delete mode 100644 requirements-dev.txt
 delete mode 100644 requirements.lock
 create mode 100644 scrapegraphai/utils/merge_results.py

diff --git a/pyproject.toml b/pyproject.toml
index e5b997ba..30725709 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,8 @@ dependencies = [
     "undetected-playwright==0.3.0",
     "semchunk==1.0.1",
     "html2text==2024.2.26",
-    "langchain-fireworks==0.1.3"
+    "langchain-fireworks==0.1.3",
+    "langchain-community==0.2.9"
 ]
 
 license = "MIT"
diff --git a/requirements-dev.lock b/requirements-dev.lock
deleted file mode 100644
index b0bcaaa0..00000000
--- a/requirements-dev.lock
+++ /dev/null
@@ -1,585 +0,0 @@
-# generated by rye
-# use `rye lock` or `rye sync` to update this lockfile
-#
-# last locked with the following flags:
-#   pre: false
-#   features: []
-#   all-features: false
-#   with-sources: false
-
--e file:.
-aiofiles==23.2.1
-    # via burr
-aiohttp==3.9.5
-    # via langchain
-    # via langchain-community
-    # via langchain-fireworks
-aiosignal==1.3.1
-    # via aiohttp
-alabaster==0.7.16
-    # via sphinx
-altair==5.3.0
-    # via streamlit
-annotated-types==0.7.0
-    # via pydantic
-anthropic==0.26.1
-    # via langchain-anthropic
-anyio==4.3.0
-    # via anthropic
-    # via groq
-    # via httpx
-    # via openai
-    # via starlette
-    # via watchfiles
-astroid==3.2.2
-    # via pylint
-async-timeout==4.0.3
-    # via aiohttp
-    # via langchain
-attrs==23.2.0
-    # via aiohttp
-    # via jsonschema
-    # via referencing
-babel==2.15.0
-    # via sphinx
-beautifulsoup4==4.12.3
-    # via furo
-    # via google
-    # via scrapegraphai
-blinker==1.8.2
-    # via streamlit
-boto3==1.34.113
-    # via langchain-aws
-botocore==1.34.113
-    # via boto3
-    # via s3transfer
-burr==0.22.1
-    # via scrapegraphai
-cachetools==5.3.3
-    # via google-auth
-    # via streamlit
-certifi==2024.2.2
-    # via httpcore
-    # via httpx
-    # via requests
-charset-normalizer==3.3.2
-    # via requests
-click==8.1.7
-    # via burr
-    # via streamlit
-    # via typer
-    # via uvicorn
-contourpy==1.2.1
-    # via matplotlib
-cycler==0.12.1
-    # via matplotlib
-dataclasses-json==0.6.6
-    # via langchain
-    # via langchain-community
-defusedxml==0.7.1
-    # via langchain-anthropic
-dill==0.3.8
-    # via pylint
-distro==1.9.0
-    # via anthropic
-    # via groq
-    # via openai
-dnspython==2.6.1
-    # via email-validator
-docstring-parser==0.16
-    # via google-cloud-aiplatform
-docutils==0.19
-    # via sphinx
-email-validator==2.1.1
-    # via fastapi
-exceptiongroup==1.2.1
-    # via anyio
-    # via pytest
-faiss-cpu==1.8.0
-    # via scrapegraphai
-fastapi==0.111.0
-    # via burr
-    # via fastapi-pagination
-fastapi-cli==0.0.4
-    # via fastapi
-fastapi-pagination==0.12.24
-    # via burr
-filelock==3.14.0
-    # via huggingface-hub
-fireworks-ai==0.14.0
-    # via langchain-fireworks
-fonttools==4.52.1
-    # via matplotlib
-free-proxy==1.1.1
-    # via scrapegraphai
-frozenlist==1.4.1
-    # via aiohttp
-    # via aiosignal
-fsspec==2024.5.0
-    # via huggingface-hub
-furo==2024.5.6
-    # via scrapegraphai
-gitdb==4.0.11
-    # via gitpython
-gitpython==3.1.43
-    # via streamlit
-google==3.0.0
-    # via scrapegraphai
-google-ai-generativelanguage==0.6.4
-    # via google-generativeai
-google-api-core==2.19.0
-    # via google-ai-generativelanguage
-    # via google-api-python-client
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via google-cloud-core
-    # via google-cloud-resource-manager
-    # via google-cloud-storage
-    # via google-generativeai
-google-api-python-client==2.130.0
-    # via google-generativeai
-google-auth==2.29.0
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-api-python-client
-    # via google-auth-httplib2
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via google-cloud-core
-    # via google-cloud-resource-manager
-    # via google-cloud-storage
-    # via google-generativeai
-google-auth-httplib2==0.2.0
-    # via google-api-python-client
-google-cloud-aiplatform==1.58.0
-    # via langchain-google-vertexai
-google-cloud-bigquery==3.25.0
-    # via google-cloud-aiplatform
-google-cloud-core==2.4.1
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-google-cloud-resource-manager==1.12.3
-    # via google-cloud-aiplatform
-google-cloud-storage==2.17.0
-    # via google-cloud-aiplatform
-    # via langchain-google-vertexai
-google-crc32c==1.5.0
-    # via google-cloud-storage
-    # via google-resumable-media
-google-generativeai==0.5.4
-    # via langchain-google-genai
-google-resumable-media==2.7.1
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-googleapis-common-protos==1.63.0
-    # via google-api-core
-    # via grpc-google-iam-v1
-    # via grpcio-status
-graphviz==0.20.3
-    # via burr
-    # via scrapegraphai
-greenlet==3.0.3
-    # via playwright
-groq==0.8.0
-    # via langchain-groq
-grpc-google-iam-v1==0.13.1
-    # via google-cloud-resource-manager
-grpcio==1.64.0
-    # via google-api-core
-    # via googleapis-common-protos
-    # via grpc-google-iam-v1
-    # via grpcio-status
-grpcio-status==1.62.2
-    # via google-api-core
-h11==0.14.0
-    # via httpcore
-    # via uvicorn
-html2text==2024.2.26
-    # via scrapegraphai
-httpcore==1.0.5
-    # via httpx
-httplib2==0.22.0
-    # via google-api-python-client
-    # via google-auth-httplib2
-httptools==0.6.1
-    # via uvicorn
-httpx==0.27.0
-    # via anthropic
-    # via fastapi
-    # via fireworks-ai
-    # via groq
-    # via openai
-httpx-sse==0.4.0
-    # via fireworks-ai
-huggingface-hub==0.23.1
-    # via tokenizers
-idna==3.7
-    # via anyio
-    # via email-validator
-    # via httpx
-    # via requests
-    # via yarl
-imagesize==1.4.1
-    # via sphinx
-importlib-metadata==8.0.0
-    # via sphinx
-importlib-resources==6.4.0
-    # via matplotlib
-iniconfig==2.0.0
-    # via pytest
-isort==5.13.2
-    # via pylint
-jinja2==3.1.4
-    # via altair
-    # via burr
-    # via fastapi
-    # via pydeck
-    # via sphinx
-jiter==0.4.0
-    # via anthropic
-jmespath==1.0.1
-    # via boto3
-    # via botocore
-jsonpatch==1.33
-    # via langchain
-    # via langchain-core
-jsonpointer==2.4
-    # via jsonpatch
-jsonschema==4.22.0
-    # via altair
-jsonschema-specifications==2023.12.1
-    # via jsonschema
-kiwisolver==1.4.5
-    # via matplotlib
-langchain==0.1.15
-    # via scrapegraphai
-langchain-anthropic==0.1.11
-    # via scrapegraphai
-langchain-aws==0.1.3
-    # via scrapegraphai
-langchain-community==0.0.38
-    # via langchain
-langchain-core==0.1.52
-    # via langchain
-    # via langchain-anthropic
-    # via langchain-aws
-    # via langchain-community
-    # via langchain-fireworks
-    # via langchain-google-genai
-    # via langchain-google-vertexai
-    # via langchain-groq
-    # via langchain-openai
-    # via langchain-text-splitters
-langchain-fireworks==0.1.3
-    # via scrapegraphai
-langchain-google-genai==1.0.3
-    # via scrapegraphai
-langchain-google-vertexai==1.0.4
-    # via scrapegraphai
-langchain-groq==0.1.3
-    # via scrapegraphai
-langchain-openai==0.1.6
-    # via scrapegraphai
-langchain-text-splitters==0.0.2
-    # via langchain
-langsmith==0.1.63
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-loguru==0.7.2
-    # via burr
-lxml==5.2.2
-    # via free-proxy
-markdown-it-py==3.0.0
-    # via rich
-markupsafe==2.1.5
-    # via jinja2
-marshmallow==3.21.2
-    # via dataclasses-json
-matplotlib==3.9.0
-    # via burr
-mccabe==0.7.0
-    # via pylint
-mdurl==0.1.2
-    # via markdown-it-py
-minify-html==0.15.0
-    # via scrapegraphai
-multidict==6.0.5
-    # via aiohttp
-    # via yarl
-mypy-extensions==1.0.0
-    # via typing-inspect
-numpy==1.26.4
-    # via altair
-    # via contourpy
-    # via faiss-cpu
-    # via langchain
-    # via langchain-aws
-    # via langchain-community
-    # via matplotlib
-    # via pandas
-    # via pyarrow
-    # via pydeck
-    # via sf-hamilton
-    # via shapely
-    # via streamlit
-openai==1.30.3
-    # via burr
-    # via langchain-fireworks
-    # via langchain-openai
-orjson==3.10.3
-    # via fastapi
-    # via langsmith
-packaging==23.2
-    # via altair
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via huggingface-hub
-    # via langchain-core
-    # via marshmallow
-    # via matplotlib
-    # via pytest
-    # via sphinx
-    # via streamlit
-pandas==2.2.2
-    # via altair
-    # via scrapegraphai
-    # via sf-hamilton
-    # via streamlit
-pillow==10.3.0
-    # via fireworks-ai
-    # via matplotlib
-    # via streamlit
-platformdirs==4.2.2
-    # via pylint
-playwright==1.43.0
-    # via scrapegraphai
-    # via undetected-playwright
-pluggy==1.5.0
-    # via pytest
-proto-plus==1.23.0
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-cloud-aiplatform
-    # via google-cloud-resource-manager
-protobuf==4.25.3
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-cloud-aiplatform
-    # via google-cloud-resource-manager
-    # via google-generativeai
-    # via googleapis-common-protos
-    # via grpc-google-iam-v1
-    # via grpcio-status
-    # via proto-plus
-    # via streamlit
-pyarrow==16.1.0
-    # via streamlit
-pyasn1==0.6.0
-    # via pyasn1-modules
-    # via rsa
-pyasn1-modules==0.4.0
-    # via google-auth
-pydantic==2.7.1
-    # via anthropic
-    # via burr
-    # via fastapi
-    # via fastapi-pagination
-    # via fireworks-ai
-    # via google-cloud-aiplatform
-    # via google-generativeai
-    # via groq
-    # via langchain
-    # via langchain-core
-    # via langsmith
-    # via openai
-pydantic-core==2.18.2
-    # via pydantic
-pydeck==0.9.1
-    # via streamlit
-pyee==11.1.0
-    # via playwright
-pygments==2.18.0
-    # via furo
-    # via rich
-    # via sphinx
-pylint==3.2.5
-pyparsing==3.1.2
-    # via httplib2
-    # via matplotlib
-pytest==8.0.0
-    # via pytest-mock
-pytest-mock==3.14.0
-python-dateutil==2.9.0.post0
-    # via botocore
-    # via google-cloud-bigquery
-    # via matplotlib
-    # via pandas
-python-dotenv==1.0.1
-    # via scrapegraphai
-    # via uvicorn
-python-multipart==0.0.9
-    # via fastapi
-pytz==2024.1
-    # via pandas
-pyyaml==6.0.1
-    # via huggingface-hub
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-    # via uvicorn
-referencing==0.35.1
-    # via jsonschema
-    # via jsonschema-specifications
-regex==2024.5.15
-    # via tiktoken
-requests==2.32.2
-    # via burr
-    # via free-proxy
-    # via google-api-core
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-    # via huggingface-hub
-    # via langchain
-    # via langchain-community
-    # via langchain-fireworks
-    # via langsmith
-    # via sphinx
-    # via streamlit
-    # via tiktoken
-rich==13.7.1
-    # via streamlit
-    # via typer
-rpds-py==0.18.1
-    # via jsonschema
-    # via referencing
-rsa==4.9
-    # via google-auth
-s3transfer==0.10.1
-    # via boto3
-semchunk==1.0.1
-    # via scrapegraphai
-sf-hamilton==1.63.0
-    # via burr
-shapely==2.0.4
-    # via google-cloud-aiplatform
-shellingham==1.5.4
-    # via typer
-six==1.16.0
-    # via python-dateutil
-smmap==5.0.1
-    # via gitdb
-sniffio==1.3.1
-    # via anthropic
-    # via anyio
-    # via groq
-    # via httpx
-    # via openai
-snowballstemmer==2.2.0
-    # via sphinx
-soupsieve==2.5
-    # via beautifulsoup4
-sphinx==6.0.0
-    # via furo
-    # via scrapegraphai
-    # via sphinx-basic-ng
-sphinx-basic-ng==1.0.0b2
-    # via furo
-sphinxcontrib-applehelp==1.0.8
-    # via sphinx
-sphinxcontrib-devhelp==1.0.6
-    # via sphinx
-sphinxcontrib-htmlhelp==2.0.5
-    # via sphinx
-sphinxcontrib-jsmath==1.0.1
-    # via sphinx
-sphinxcontrib-qthelp==1.0.7
-    # via sphinx
-sphinxcontrib-serializinghtml==1.1.10
-    # via sphinx
-sqlalchemy==2.0.30
-    # via langchain
-    # via langchain-community
-starlette==0.37.2
-    # via fastapi
-streamlit==1.35.0
-    # via burr
-tenacity==8.3.0
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-    # via streamlit
-tiktoken==0.7.0
-    # via langchain-openai
-    # via scrapegraphai
-tokenizers==0.19.1
-    # via anthropic
-toml==0.10.2
-    # via streamlit
-tomli==2.0.1
-    # via pylint
-    # via pytest
-tomlkit==0.12.5
-    # via pylint
-toolz==0.12.1
-    # via altair
-tornado==6.4
-    # via streamlit
-tqdm==4.66.4
-    # via google-generativeai
-    # via huggingface-hub
-    # via openai
-    # via scrapegraphai
-    # via semchunk
-typer==0.12.3
-    # via fastapi-cli
-typing-extensions==4.12.0
-    # via altair
-    # via anthropic
-    # via anyio
-    # via astroid
-    # via fastapi
-    # via fastapi-pagination
-    # via google-generativeai
-    # via groq
-    # via huggingface-hub
-    # via openai
-    # via pydantic
-    # via pydantic-core
-    # via pyee
-    # via pylint
-    # via sf-hamilton
-    # via sqlalchemy
-    # via starlette
-    # via streamlit
-    # via typer
-    # via typing-inspect
-    # via uvicorn
-typing-inspect==0.9.0
-    # via dataclasses-json
-    # via sf-hamilton
-tzdata==2024.1
-    # via pandas
-ujson==5.10.0
-    # via fastapi
-undetected-playwright==0.3.0
-    # via scrapegraphai
-uritemplate==4.1.1
-    # via google-api-python-client
-urllib3==1.26.18
-    # via botocore
-    # via requests
-uvicorn==0.29.0
-    # via burr
-    # via fastapi
-uvloop==0.19.0
-    # via uvicorn
-watchfiles==0.21.0
-    # via uvicorn
-websockets==12.0
-    # via uvicorn
-yarl==1.9.4
-    # via aiohttp
-zipp==3.19.2
-    # via importlib-metadata
-    # via importlib-resources
diff --git a/requirements-dev.txt b/requirements-dev.txt
deleted file mode 100644
index d33296d5..00000000
--- a/requirements-dev.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-sphinx==7.1.2
-furo==2024.5.6
-pytest==8.0.0
-burr[start]==0.22.1
\ No newline at end of file
diff --git a/requirements.lock b/requirements.lock
deleted file mode 100644
index 7a8bb455..00000000
--- a/requirements.lock
+++ /dev/null
@@ -1,366 +0,0 @@
-# generated by rye
-# use `rye lock` or `rye sync` to update this lockfile
-#
-# last locked with the following flags:
-#   pre: false
-#   features: []
-#   all-features: false
-#   with-sources: false
-
--e file:.
-aiohttp==3.9.5
-    # via langchain
-    # via langchain-community
-    # via langchain-fireworks
-aiosignal==1.3.1
-    # via aiohttp
-annotated-types==0.7.0
-    # via pydantic
-anthropic==0.26.1
-    # via langchain-anthropic
-anyio==4.3.0
-    # via anthropic
-    # via groq
-    # via httpx
-    # via openai
-async-timeout==4.0.3
-    # via aiohttp
-    # via langchain
-attrs==23.2.0
-    # via aiohttp
-beautifulsoup4==4.12.3
-    # via google
-    # via scrapegraphai
-boto3==1.34.113
-    # via langchain-aws
-botocore==1.34.113
-    # via boto3
-    # via s3transfer
-cachetools==5.3.3
-    # via google-auth
-certifi==2024.2.2
-    # via httpcore
-    # via httpx
-    # via requests
-charset-normalizer==3.3.2
-    # via requests
-dataclasses-json==0.6.6
-    # via langchain
-    # via langchain-community
-defusedxml==0.7.1
-    # via langchain-anthropic
-distro==1.9.0
-    # via anthropic
-    # via groq
-    # via openai
-docstring-parser==0.16
-    # via google-cloud-aiplatform
-exceptiongroup==1.2.1
-    # via anyio
-faiss-cpu==1.8.0
-    # via scrapegraphai
-filelock==3.14.0
-    # via huggingface-hub
-fireworks-ai==0.14.0
-    # via langchain-fireworks
-free-proxy==1.1.1
-    # via scrapegraphai
-frozenlist==1.4.1
-    # via aiohttp
-    # via aiosignal
-fsspec==2024.5.0
-    # via huggingface-hub
-google==3.0.0
-    # via scrapegraphai
-google-ai-generativelanguage==0.6.4
-    # via google-generativeai
-google-api-core==2.19.0
-    # via google-ai-generativelanguage
-    # via google-api-python-client
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via google-cloud-core
-    # via google-cloud-resource-manager
-    # via google-cloud-storage
-    # via google-generativeai
-google-api-python-client==2.130.0
-    # via google-generativeai
-google-auth==2.29.0
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-api-python-client
-    # via google-auth-httplib2
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via google-cloud-core
-    # via google-cloud-resource-manager
-    # via google-cloud-storage
-    # via google-generativeai
-google-auth-httplib2==0.2.0
-    # via google-api-python-client
-google-cloud-aiplatform==1.58.0
-    # via langchain-google-vertexai
-google-cloud-bigquery==3.25.0
-    # via google-cloud-aiplatform
-google-cloud-core==2.4.1
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-google-cloud-resource-manager==1.12.3
-    # via google-cloud-aiplatform
-google-cloud-storage==2.17.0
-    # via google-cloud-aiplatform
-    # via langchain-google-vertexai
-google-crc32c==1.5.0
-    # via google-cloud-storage
-    # via google-resumable-media
-google-generativeai==0.5.4
-    # via langchain-google-genai
-google-resumable-media==2.7.1
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-googleapis-common-protos==1.63.0
-    # via google-api-core
-    # via grpc-google-iam-v1
-    # via grpcio-status
-graphviz==0.20.3
-    # via scrapegraphai
-greenlet==3.0.3
-    # via playwright
-groq==0.8.0
-    # via langchain-groq
-grpc-google-iam-v1==0.13.1
-    # via google-cloud-resource-manager
-grpcio==1.64.0
-    # via google-api-core
-    # via googleapis-common-protos
-    # via grpc-google-iam-v1
-    # via grpcio-status
-grpcio-status==1.62.2
-    # via google-api-core
-h11==0.14.0
-    # via httpcore
-html2text==2024.2.26
-    # via scrapegraphai
-httpcore==1.0.5
-    # via httpx
-httplib2==0.22.0
-    # via google-api-python-client
-    # via google-auth-httplib2
-httpx==0.27.0
-    # via anthropic
-    # via fireworks-ai
-    # via groq
-    # via openai
-httpx-sse==0.4.0
-    # via fireworks-ai
-huggingface-hub==0.23.1
-    # via tokenizers
-idna==3.7
-    # via anyio
-    # via httpx
-    # via requests
-    # via yarl
-jiter==0.4.0
-    # via anthropic
-jmespath==1.0.1
-    # via boto3
-    # via botocore
-jsonpatch==1.33
-    # via langchain
-    # via langchain-core
-jsonpointer==2.4
-    # via jsonpatch
-langchain==0.1.15
-    # via scrapegraphai
-langchain-anthropic==0.1.11
-    # via scrapegraphai
-langchain-aws==0.1.3
-    # via scrapegraphai
-langchain-community==0.0.38
-    # via langchain
-langchain-core==0.1.52
-    # via langchain
-    # via langchain-anthropic
-    # via langchain-aws
-    # via langchain-community
-    # via langchain-fireworks
-    # via langchain-google-genai
-    # via langchain-google-vertexai
-    # via langchain-groq
-    # via langchain-openai
-    # via langchain-text-splitters
-langchain-fireworks==0.1.3
-    # via scrapegraphai
-langchain-google-genai==1.0.3
-    # via scrapegraphai
-langchain-google-vertexai==1.0.4
-    # via scrapegraphai
-langchain-groq==0.1.3
-    # via scrapegraphai
-langchain-openai==0.1.6
-    # via scrapegraphai
-langchain-text-splitters==0.0.2
-    # via langchain
-langsmith==0.1.63
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-lxml==5.2.2
-    # via free-proxy
-marshmallow==3.21.2
-    # via dataclasses-json
-minify-html==0.15.0
-    # via scrapegraphai
-multidict==6.0.5
-    # via aiohttp
-    # via yarl
-mypy-extensions==1.0.0
-    # via typing-inspect
-numpy==1.26.4
-    # via faiss-cpu
-    # via langchain
-    # via langchain-aws
-    # via langchain-community
-    # via pandas
-    # via shapely
-openai==1.30.3
-    # via langchain-fireworks
-    # via langchain-openai
-orjson==3.10.3
-    # via langsmith
-packaging==23.2
-    # via google-cloud-aiplatform
-    # via google-cloud-bigquery
-    # via huggingface-hub
-    # via langchain-core
-    # via marshmallow
-pandas==2.2.2
-    # via scrapegraphai
-pillow==10.3.0
-    # via fireworks-ai
-playwright==1.43.0
-    # via scrapegraphai
-    # via undetected-playwright
-proto-plus==1.23.0
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-cloud-aiplatform
-    # via google-cloud-resource-manager
-protobuf==4.25.3
-    # via google-ai-generativelanguage
-    # via google-api-core
-    # via google-cloud-aiplatform
-    # via google-cloud-resource-manager
-    # via google-generativeai
-    # via googleapis-common-protos
-    # via grpc-google-iam-v1
-    # via grpcio-status
-    # via proto-plus
-pyasn1==0.6.0
-    # via pyasn1-modules
-    # via rsa
-pyasn1-modules==0.4.0
-    # via google-auth
-pydantic==2.7.1
-    # via anthropic
-    # via fireworks-ai
-    # via google-cloud-aiplatform
-    # via google-generativeai
-    # via groq
-    # via langchain
-    # via langchain-core
-    # via langsmith
-    # via openai
-pydantic-core==2.18.2
-    # via pydantic
-pyee==11.1.0
-    # via playwright
-pyparsing==3.1.2
-    # via httplib2
-python-dateutil==2.9.0.post0
-    # via botocore
-    # via google-cloud-bigquery
-    # via pandas
-python-dotenv==1.0.1
-    # via scrapegraphai
-pytz==2024.1
-    # via pandas
-pyyaml==6.0.1
-    # via huggingface-hub
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-regex==2024.5.15
-    # via tiktoken
-requests==2.32.2
-    # via free-proxy
-    # via google-api-core
-    # via google-cloud-bigquery
-    # via google-cloud-storage
-    # via huggingface-hub
-    # via langchain
-    # via langchain-community
-    # via langchain-fireworks
-    # via langsmith
-    # via tiktoken
-rsa==4.9
-    # via google-auth
-s3transfer==0.10.1
-    # via boto3
-semchunk==1.0.1
-    # via scrapegraphai
-shapely==2.0.4
-    # via google-cloud-aiplatform
-six==1.16.0
-    # via python-dateutil
-sniffio==1.3.1
-    # via anthropic
-    # via anyio
-    # via groq
-    # via httpx
-    # via openai
-soupsieve==2.5
-    # via beautifulsoup4
-sqlalchemy==2.0.30
-    # via langchain
-    # via langchain-community
-tenacity==8.3.0
-    # via langchain
-    # via langchain-community
-    # via langchain-core
-tiktoken==0.7.0
-    # via langchain-openai
-    # via scrapegraphai
-tokenizers==0.19.1
-    # via anthropic
-tqdm==4.66.4
-    # via google-generativeai
-    # via huggingface-hub
-    # via openai
-    # via scrapegraphai
-    # via semchunk
-typing-extensions==4.12.0
-    # via anthropic
-    # via anyio
-    # via google-generativeai
-    # via groq
-    # via huggingface-hub
-    # via openai
-    # via pydantic
-    # via pydantic-core
-    # via pyee
-    # via sqlalchemy
-    # via typing-inspect
-typing-inspect==0.9.0
-    # via dataclasses-json
-tzdata==2024.1
-    # via pandas
-undetected-playwright==0.3.0
-    # via scrapegraphai
-uritemplate==4.1.1
-    # via google-api-python-client
-urllib3==1.26.18
-    # via botocore
-    # via requests
-yarl==1.9.4
-    # via aiohttp
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index f764e58b..eb440a75 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -7,6 +7,8 @@
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
+import asyncio
+from ..utils.merge_results import merge_results
 from ..utils.logging import get_logger
 from ..models import Ollama, OpenAI
 from .base_node import BaseNode
@@ -109,42 +111,46 @@ def execute(self, state: dict) -> dict:
 
         chains_dict = {}
 
+        if len(doc) == 1:
+            prompt = PromptTemplate(
+                template=template_no_chunks_prompt,
+                input_variables=["question"],
+                partial_variables={"context": doc,
+                                    "format_instructions": format_instructions})
+            chain =  prompt | self.llm_model | output_parser
+            answer = chain.invoke({"question": user_prompt})
+
+            state.update({self.output[0]: answer})
+            return state
+
         # Use tqdm to add progress bar
         for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
-            if len(doc) == 1:
-                prompt = PromptTemplate(
-                    template=template_no_chunks_prompt,
-                    input_variables=["question"],
-                    partial_variables={"context": chunk,
-                                       "format_instructions": format_instructions})
-                chain =  prompt | self.llm_model | output_parser
-                answer = chain.invoke({"question": user_prompt})
-                break
 
             prompt = PromptTemplate(
-                    template=template_chunks_prompt,
-                    input_variables=["question"],
-                    partial_variables={"context": chunk,
-                                        "chunk_id": i + 1,
-                                        "format_instructions": format_instructions})
-            # Dynamically name the chains based on their index
+                template=template_chunks,
+                input_variables=["question"],
+                partial_variables={"context": chunk,
+                                "chunk_id": i + 1,
+                                "format_instructions": format_instructions})
+            # Add chain to dictionary with dynamic name
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm_model | output_parser
 
-        if len(chains_dict) > 1:
-            # Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
-            map_chain = RunnableParallel(**chains_dict)
-            # Chain
-            answer = map_chain.invoke({"question": user_prompt})
-            # Merge the answers from the chunks
-            merge_prompt = PromptTemplate(
-                template = template_merge_prompt,
-                input_variables=["context", "question"],
-                partial_variables={"format_instructions": format_instructions},
-            )
-            merge_chain = merge_prompt | self.llm_model | output_parser
-            answer = merge_chain.invoke({"context": answer, "question": user_prompt})
-
-        # Update the state with the generated answer
-        state.update({self.output[0]: answer})
+
+        async def process_chains():
+            async_runner = RunnableParallel()
+            for chain_name, chain in chains_dict.items():
+                async_runner.add(chain.ainvoke([{"question": user_prompt}] * len(doc)))
+            
+            batch_results = await async_runner.run()
+            return batch_results
+
+        loop = asyncio.get_event_loop()
+        batch_answers = loop.run_until_complete(process_chains())
+
+        # Merge batch results (assuming same structure)
+        merged_answer = merge_results(batch_answers)
+        answers = merged_answer
+
+        state.update({self.output[0]: answers})
         return state
diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py
index 707d2b18..15fd6886 100644
--- a/scrapegraphai/utils/__init__.py
+++ b/scrapegraphai/utils/__init__.py
@@ -11,3 +11,4 @@
 from .cleanup_html import cleanup_html
 from .logging import *
 from .convert_to_md import convert_to_md
+from .merge_results import merge_results
diff --git a/scrapegraphai/utils/merge_results.py b/scrapegraphai/utils/merge_results.py
new file mode 100644
index 00000000..ff5728fa
--- /dev/null
+++ b/scrapegraphai/utils/merge_results.py
@@ -0,0 +1,30 @@
+def merge_results(batch_answers):
+     """
+     Merges the results from single-chunk processing and batch processing, and adds separators between the chunks.
+     Parameters:
+     -----------
+     answers : list of str
+         A list of strings containing the results from single-chunk processing.
+     
+     batch_answers : list of dict
+         A list of dictionaries, where each dictionary contains a key "text" with the batch processing result as a string.
+     
+     Returns:
+     --------
+     str
+         A single string containing all merged results, with each result separated by a newline character.
+     
+     Example:
+     --------
+     >>> answers = ["Result from single-chunk 1", "Result from single-chunk 2"]
+     >>> batch_answers = [{"text": "Result from batch 1"}, {"text": "Result from batch 2"}]
+     >>> merge_results(answers, batch_answers)
+     'Result from single-chunk 1\nResult from single-chunk 2\nResult from batch 1\nResult from batch 2'
+     """
+     # Combine answers from single-chunk processing and batch processing
+     merged_answers = [answer["text"] for answer in batch_answers]
+
+     # Add separators between chunks
+     merged_answers = "\n".join(merged_answers)
+
+     return merged_answers
\ No newline at end of file

From 0c4b2908d98efbb2b0a6faf68618a801d726bb5f Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 22 Jul 2024 19:58:33 +0200
Subject: [PATCH 02/51] feat: add generate_answer node paralellization

Co-Authored-By: Federico Minutoli <40361744+DiTo97@users.noreply.github.com>
---
 scrapegraphai/nodes/generate_answer_node.py | 26 +++++++++------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index eb440a75..9cd5dce5 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -1,13 +1,12 @@
 """
 GenerateAnswerNode Module
 """
-
+import asyncio
 from typing import List, Optional
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
-import asyncio
 from ..utils.merge_results import merge_results
 from ..utils.logging import get_logger
 from ..models import Ollama, OpenAI
@@ -136,21 +135,18 @@ def execute(self, state: dict) -> dict:
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm_model | output_parser
 
+        async_runner = RunnableParallel(**chains_dict)
 
-        async def process_chains():
-            async_runner = RunnableParallel()
-            for chain_name, chain in chains_dict.items():
-                async_runner.add(chain.ainvoke([{"question": user_prompt}] * len(doc)))
-            
-            batch_results = await async_runner.run()
-            return batch_results
+        batch_results =  async_runner.invoke({"question": user_prompt})
 
-        loop = asyncio.get_event_loop()
-        batch_answers = loop.run_until_complete(process_chains())
+        merge_prompt = PromptTemplate(
+                template = template_merge_prompt,
+                input_variables=["context", "question"],
+                partial_variables={"format_instructions": format_instructions},
+            )
 
-        # Merge batch results (assuming same structure)
-        merged_answer = merge_results(batch_answers)
-        answers = merged_answer
+        merge_chain = merge_prompt | self.llm_model | output_parser
+        answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
 
-        state.update({self.output[0]: answers})
+        state.update({self.output[0]: answer})
         return state

From cf2734392cda6ef6eda50517671d44d4b06e26c7 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 23 Jul 2024 13:05:50 +0200
Subject: [PATCH 03/51] removed unused function

---
 scrapegraphai/nodes/generate_answer_node.py |  1 -
 scrapegraphai/utils/__init__.py             |  1 -
 scrapegraphai/utils/merge_results.py        | 30 ---------------------
 3 files changed, 32 deletions(-)
 delete mode 100644 scrapegraphai/utils/merge_results.py

diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index 9cd5dce5..d864124e 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -7,7 +7,6 @@
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
-from ..utils.merge_results import merge_results
 from ..utils.logging import get_logger
 from ..models import Ollama, OpenAI
 from .base_node import BaseNode
diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py
index 15fd6886..707d2b18 100644
--- a/scrapegraphai/utils/__init__.py
+++ b/scrapegraphai/utils/__init__.py
@@ -11,4 +11,3 @@
 from .cleanup_html import cleanup_html
 from .logging import *
 from .convert_to_md import convert_to_md
-from .merge_results import merge_results
diff --git a/scrapegraphai/utils/merge_results.py b/scrapegraphai/utils/merge_results.py
deleted file mode 100644
index ff5728fa..00000000
--- a/scrapegraphai/utils/merge_results.py
+++ /dev/null
@@ -1,30 +0,0 @@
-def merge_results(batch_answers):
-     """
-     Merges the results from single-chunk processing and batch processing, and adds separators between the chunks.
-     Parameters:
-     -----------
-     answers : list of str
-         A list of strings containing the results from single-chunk processing.
-     
-     batch_answers : list of dict
-         A list of dictionaries, where each dictionary contains a key "text" with the batch processing result as a string.
-     
-     Returns:
-     --------
-     str
-         A single string containing all merged results, with each result separated by a newline character.
-     
-     Example:
-     --------
-     >>> answers = ["Result from single-chunk 1", "Result from single-chunk 2"]
-     >>> batch_answers = [{"text": "Result from batch 1"}, {"text": "Result from batch 2"}]
-     >>> merge_results(answers, batch_answers)
-     'Result from single-chunk 1\nResult from single-chunk 2\nResult from batch 1\nResult from batch 2'
-     """
-     # Combine answers from single-chunk processing and batch processing
-     merged_answers = [answer["text"] for answer in batch_answers]
-
-     # Add separators between chunks
-     merged_answers = "\n".join(merged_answers)
-
-     return merged_answers
\ No newline at end of file

From df1ecc00192a48abc6bbbe16444507c4bdf6362c Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 23 Jul 2024 13:06:59 +0200
Subject: [PATCH 04/51] Update generate_answer_node.py

---
 scrapegraphai/nodes/generate_answer_node.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index d864124e..81812598 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -107,8 +107,6 @@ def execute(self, state: dict) -> dict:
             template_chunks_prompt = self.additional_info + template_chunks_prompt
             template_merge_prompt = self.additional_info + template_merge_prompt
 
-        chains_dict = {}
-
         if len(doc) == 1:
             prompt = PromptTemplate(
                 template=template_no_chunks_prompt,
@@ -121,7 +119,7 @@ def execute(self, state: dict) -> dict:
             state.update({self.output[0]: answer})
             return state
 
-        # Use tqdm to add progress bar
+        chains_dict = {}
         for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
 
             prompt = PromptTemplate(

From 7080a0afd527a34ada33ee2d3ace8e724d879df7 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 23 Jul 2024 14:15:37 +0000
Subject: [PATCH 05/51] ci(release): 1.11.0-beta.1 [skip ci]

## [1.11.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.4...v1.11.0-beta.1) (2024-07-23)

### Features

* add new toml ([fcb3220](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fcb3220868e7ef1127a7a47f40d0379be282e6eb))
* add nvidia connection ([fc0dadb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fc0dadb8f812dfd636dec856921a971b58695ce3))

### Bug Fixes

* **md_conversion:** add absolute links md, added missing dependency ([12b5ead](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/12b5eada6ea783770afd630ede69b8cf867a7ded))

### chore

* **dependecies:** add script to auto-update requirements ([3289c7b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3289c7bf5ec58ac3d04e9e5e8e654af9abcee228))
* **ci:** set up workflow for requirements auto-update ([295fc28](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/295fc28ceb02c78198f7fbe678352503b3259b6b))
* update requirements.txt ([c7bac98](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c7bac98d2e79e5ab98fa65d7efa858a2cdda1622))
* upgrade dependencies and scripts ([74d142e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/74d142eaae724b087eada9c0c876b40a2ccc7cae))
* **pyproject:** upgrade dependencies ([0425124](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0425124c570f765b98fcf67ba6649f4f9fe76b15))

### Docs

* add hero image ([4182e23](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4182e23e3b8d8f141b119b6014ae3ff20b3892e3))
* updated readme ([c377ae0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c377ae0544a78ebdc0d15f8d23b3846c26876c8c))

### CI

* **release:** 1.10.0-beta.6 [skip ci] ([254bde7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/254bde7008b41ffa434925e3ae84340c53a565bd))
* **release:** 1.10.0-beta.7 [skip ci] ([1756e85](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1756e8522f3874de8afbef9ac327f9b3f1a49d07))
* **release:** 1.10.0-beta.8 [skip ci] ([255e569](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/255e569172b1029bc2a723b2ec66bcf3d3ee3791))
---
 CHANGELOG.md   | 34 ++++++++++++++++++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 43b7b08e..d59af3cb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,37 @@
+## [1.11.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.4...v1.11.0-beta.1) (2024-07-23)
+
+
+### Features
+
+* add new toml ([fcb3220](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fcb3220868e7ef1127a7a47f40d0379be282e6eb))
+* add nvidia connection ([fc0dadb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fc0dadb8f812dfd636dec856921a971b58695ce3))
+
+
+### Bug Fixes
+
+* **md_conversion:** add absolute links md, added missing dependency ([12b5ead](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/12b5eada6ea783770afd630ede69b8cf867a7ded))
+
+
+### chore
+
+* **dependecies:** add script to auto-update requirements ([3289c7b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/3289c7bf5ec58ac3d04e9e5e8e654af9abcee228))
+* **ci:** set up workflow for requirements auto-update ([295fc28](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/295fc28ceb02c78198f7fbe678352503b3259b6b))
+* update requirements.txt ([c7bac98](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c7bac98d2e79e5ab98fa65d7efa858a2cdda1622))
+* upgrade dependencies and scripts ([74d142e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/74d142eaae724b087eada9c0c876b40a2ccc7cae))
+* **pyproject:** upgrade dependencies ([0425124](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0425124c570f765b98fcf67ba6649f4f9fe76b15))
+
+
+### Docs
+
+* add hero image ([4182e23](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4182e23e3b8d8f141b119b6014ae3ff20b3892e3))
+* updated readme ([c377ae0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c377ae0544a78ebdc0d15f8d23b3846c26876c8c))
+
+
+### CI
+
+* **release:** 1.10.0-beta.6 [skip ci] ([254bde7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/254bde7008b41ffa434925e3ae84340c53a565bd))
+* **release:** 1.10.0-beta.7 [skip ci] ([1756e85](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1756e8522f3874de8afbef9ac327f9b3f1a49d07))
+* **release:** 1.10.0-beta.8 [skip ci] ([255e569](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/255e569172b1029bc2a723b2ec66bcf3d3ee3791))
 
 ## [1.10.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.0-beta.7...v1.10.0-beta.8) (2024-07-23)
 
diff --git a/pyproject.toml b/pyproject.toml
index 989e32be..9fbc763d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.10.4"
+version = "1.11.0b1"
 
 
From 2edad66788cbd92f197e3b37db13c44bfa39e36a Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Tue, 23 Jul 2024 20:51:49 +0200
Subject: [PATCH 06/51] chore: rebuild requirements

---
 requirements.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 124840e5..440bf78a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,5 @@
 langchain>=0.2.10
-langchain_community>=0.2.9
 langchain-google-genai>=1.0.7
-langchain-fireworks>=0.1.3
 langchain-google-vertexai
 langchain-openai>=0.1.17
 langchain-groq>=0.1.3
@@ -22,4 +20,5 @@ playwright>=1.43.0
 google>=3.0.0
 undetected-playwright>=0.3.0
 semchunk>=1.0.1
-
+langchain-fireworks>=0.1.3
+langchain-community>=0.2.9

From 377d679eecd62611c0c9a3cba8202c6f0719ed31 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <88108002+VinciGit00@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:17:48 +0200
Subject: [PATCH 07/51] feat: pdate models_tokens.py

---
 scrapegraphai/helpers/models_tokens.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
index b3d61065..0724ee95 100644
--- a/scrapegraphai/helpers/models_tokens.py
+++ b/scrapegraphai/helpers/models_tokens.py
@@ -51,6 +51,10 @@
                "falcon": 2048, 
                "llama2": 4096, 
                "llama3": 8192, 
+               "llama3:70b": 8192,
+               "llama3.1":128000,
+               "llama3.1:70b": 128000,
+               "lama3.1:405b": 128000,
                "scrapegraph": 8192, 
                "llava": 4096, 
                "mixtral:8x22b-instruct": 65536, 

From bf6d487bbb26187b32f5985433b54025f6437af5 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 24 Jul 2024 13:19:41 +0000
Subject: [PATCH 08/51] ci(release): 1.11.0-beta.2 [skip ci]

## [1.11.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.1...v1.11.0-beta.2) (2024-07-24)

### Features

* pdate models_tokens.py ([377d679](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/377d679eecd62611c0c9a3cba8202c6f0719ed31))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d59af3cb..c770f2f1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.11.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.1...v1.11.0-beta.2) (2024-07-24)
+
+
+### Features
+
+* pdate models_tokens.py ([377d679](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/377d679eecd62611c0c9a3cba8202c6f0719ed31))
+
 ## [1.11.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.4...v1.11.0-beta.1) (2024-07-23)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 9fbc763d..83c0d1ac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b1"
+version = "1.11.0b2"
 
 
From f336c95c2d1833d1f829d61ae7fa415ac2caf250 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 25 Jul 2024 09:17:00 +0200
Subject: [PATCH 09/51] fix: add llama 3.1


From 66f9421fc216f0984d5a393101d1c109b08eaa33 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 25 Jul 2024 07:18:11 +0000
Subject: [PATCH 10/51] ci(release): 1.11.0-beta.3 [skip ci]

## [1.11.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.2...v1.11.0-beta.3) (2024-07-25)

### Bug Fixes

* add llama 3.1 ([f336c95](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f336c95c2d1833d1f829d61ae7fa415ac2caf250))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c770f2f1..9d43c83b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.11.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.2...v1.11.0-beta.3) (2024-07-25)
+
+
+### Bug Fixes
+
+* add llama 3.1 ([f336c95](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f336c95c2d1833d1f829d61ae7fa415ac2caf250))
+
 ## [1.11.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.1...v1.11.0-beta.2) (2024-07-24)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 83c0d1ac..ccb549c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b2"
+version = "1.11.0b3"
 
 
From 51db43a129ef05c050b6de017598a664119594ba Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 25 Jul 2024 09:50:57 +0000
Subject: [PATCH 11/51] ci(release): 1.11.0-beta.4 [skip ci]

## [1.11.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.3...v1.11.0-beta.4) (2024-07-25)

### Features

* add generate_answer node paralellization ([0c4b290](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0c4b2908d98efbb2b0a6faf68618a801d726bb5f))

### chore

* rebuild requirements ([2edad66](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2edad66788cbd92f197e3b37db13c44bfa39e36a))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d43c83b..ea0c578f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.11.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.3...v1.11.0-beta.4) (2024-07-25)
+
+
+### Features
+
+* add generate_answer node paralellization ([0c4b290](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0c4b2908d98efbb2b0a6faf68618a801d726bb5f))
+
+
+### chore
+
+* rebuild requirements ([2edad66](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2edad66788cbd92f197e3b37db13c44bfa39e36a))
+
 ## [1.11.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.2...v1.11.0-beta.3) (2024-07-25)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 9f9f6ad8..b7b0d55d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b3"
+version = "1.11.0b4"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 5137b8aa5bafdb04a6cdc53e7d3fe5c43572f293 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Fri, 26 Jul 2024 10:18:08 +0200
Subject: [PATCH 12/51] Update models_tokens.py

---
 scrapegraphai/helpers/models_tokens.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
index 0724ee95..cb00435d 100644
--- a/scrapegraphai/helpers/models_tokens.py
+++ b/scrapegraphai/helpers/models_tokens.py
@@ -196,6 +196,11 @@
     "fireworks": {
         "llama-v2-7b": 4096,
         "mixtral-8x7b-instruct": 4096,
-        "nomic-ai/nomic-embed-text-v1.5": 8192        
+        "nomic-ai/nomic-embed-text-v1.5": 8192,       
+        "llama-3.1-405B-instruct": 131072,
+        "llama-3.1-70B-instruct": 131072,
+        "llama-3.1-8B-instruct": 131072,
+        "mixtral-moe-8x22B-instruct": 65536,
+        "mixtral-moe-8x7B-instruct": 65536
     },
 }

From 05cf9adaa7bed093c5700cd2feb47b01ab88402f Mon Sep 17 00:00:00 2001
From: amosdinh <amos.dinh.2013@gmail.com>
Date: Fri, 26 Jul 2024 12:10:14 +0200
Subject: [PATCH 13/51] Ollama: Use no json format when creating the search
 query

---
 scrapegraphai/nodes/search_internet_node.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py
index 97fed67b..2474ab60 100644
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@@ -10,6 +10,7 @@
 from ..utils.logging import get_logger
 from ..utils.research_web import search_on_web
 from .base_node import BaseNode
+from ..models import Ollama
 
 
 class SearchInternetNode(BaseNode):
@@ -94,7 +95,14 @@ def execute(self, state: dict) -> dict:
 
         # Execute the chain to get the search query
         search_answer = search_prompt | self.llm_model | output_parser
-        search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
+        
+        # Ollama: Use no json format when creating the search query
+        if isinstance(self.llm_model, Ollama) and self.llm_model.format == 'json':
+            self.llm_model.format = None
+            search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
+            self.llm_model.format = 'json'
+        else:
+            search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
 
         self.logger.info(f"Search Query: {search_query}")
 

From d177afb68be036465ede1f567d2562b145d77d36 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 11:17:47 +0200
Subject: [PATCH 14/51] refactor(Ollama): integrate new LangChain chat init

---
 requirements-dev.lock                  | 11 +++++++++++
 requirements.lock                      | 12 ++++++++++++
 scrapegraphai/graphs/abstract_graph.py | 16 +++++++++++-----
 scrapegraphai/models/ollama.py         | 17 -----------------
 4 files changed, 34 insertions(+), 22 deletions(-)
 delete mode 100644 scrapegraphai/models/ollama.py

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 405395c4..bce18810 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -12,6 +12,7 @@ aiofiles==24.1.0
     # via burr
 aiohttp==3.9.5
     # via langchain
+    # via langchain-community
     # via langchain-fireworks
     # via langchain-nvidia-ai-endpoints
 aiosignal==1.3.1
@@ -179,6 +180,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -262,6 +264,7 @@ langchain-core==0.2.22
     # via langchain
     # via langchain-anthropic
     # via langchain-aws
+    # via langchain-community
     # via langchain-fireworks
     # via langchain-google-genai
     # via langchain-google-vertexai
@@ -285,6 +288,7 @@ langchain-text-splitters==0.2.2
     # via langchain
 langsmith==0.1.93
     # via langchain
+    # via langchain-community
     # via langchain-core
 loguru==0.7.2
     # via burr
@@ -319,6 +323,7 @@ numpy==1.26.4
     # via faiss-cpu
     # via langchain
     # via langchain-aws
+    # via langchain-community
     # via matplotlib
     # via pandas
     # via pyarrow
@@ -339,6 +344,7 @@ packaging==24.1
     # via google-cloud-bigquery
     # via huggingface-hub
     # via langchain-core
+    # via marshmallow
     # via matplotlib
     # via pytest
     # via sphinx
@@ -429,6 +435,7 @@ pytz==2024.1
 pyyaml==6.0.1
     # via huggingface-hub
     # via langchain
+    # via langchain-community
     # via langchain-core
     # via uvicorn
 referencing==0.35.1
@@ -444,6 +451,7 @@ requests==2.32.3
     # via google-cloud-storage
     # via huggingface-hub
     # via langchain
+    # via langchain-community
     # via langchain-fireworks
     # via langsmith
     # via sphinx
@@ -501,12 +509,14 @@ sphinxcontrib-serializinghtml==1.1.10
     # via sphinx
 sqlalchemy==2.0.31
     # via langchain
+    # via langchain-community
 starlette==0.37.2
     # via fastapi
 streamlit==1.36.0
     # via burr
 tenacity==8.5.0
     # via langchain
+    # via langchain-community
     # via langchain-core
     # via streamlit
 tiktoken==0.7.0
@@ -557,6 +567,7 @@ typing-extensions==4.12.2
     # via typing-inspect
     # via uvicorn
 typing-inspect==0.9.0
+    # via dataclasses-json
     # via sf-hamilton
 tzdata==2024.1
     # via pandas
diff --git a/requirements.lock b/requirements.lock
index 9d0602db..aa03fd14 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -10,6 +10,7 @@
 -e file:.
 aiohttp==3.9.5
     # via langchain
+    # via langchain-community
     # via langchain-fireworks
     # via langchain-nvidia-ai-endpoints
 aiosignal==1.3.1
@@ -127,6 +128,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -183,6 +185,7 @@ langchain-core==0.2.22
     # via langchain
     # via langchain-anthropic
     # via langchain-aws
+    # via langchain-community
     # via langchain-fireworks
     # via langchain-google-genai
     # via langchain-google-vertexai
@@ -206,6 +209,7 @@ langchain-text-splitters==0.2.2
     # via langchain
 langsmith==0.1.93
     # via langchain
+    # via langchain-community
     # via langchain-core
 lxml==5.2.2
     # via free-proxy
@@ -226,6 +230,7 @@ numpy==1.26.4
     # via faiss-cpu
     # via langchain
     # via langchain-aws
+    # via langchain-community
     # via pandas
     # via shapely
 openai==1.37.0
@@ -239,6 +244,7 @@ packaging==24.1
     # via google-cloud-bigquery
     # via huggingface-hub
     # via langchain-core
+    # via marshmallow
 pandas==2.2.2
     # via scrapegraphai
 pillow==10.4.0
@@ -296,6 +302,7 @@ pytz==2024.1
 pyyaml==6.0.1
     # via huggingface-hub
     # via langchain
+    # via langchain-community
     # via langchain-core
 regex==2024.5.15
     # via tiktoken
@@ -306,6 +313,7 @@ requests==2.32.3
     # via google-cloud-storage
     # via huggingface-hub
     # via langchain
+    # via langchain-community
     # via langchain-fireworks
     # via langsmith
     # via tiktoken
@@ -332,6 +340,7 @@ sqlalchemy==2.0.31
     # via langchain-community
 tenacity==8.5.0
     # via langchain
+    # via langchain-community
     # via langchain-core
 tiktoken==0.7.0
     # via langchain-openai
@@ -356,6 +365,9 @@ typing-extensions==4.12.2
     # via pydantic-core
     # via pyee
     # via sqlalchemy
+    # via typing-inspect
+typing-inspect==0.9.0
+    # via dataclasses-json
 tzdata==2024.1
     # via pandas
 undetected-playwright==0.3.0
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 91396ae0..f1c9ff92 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -7,6 +7,8 @@
 import uuid
 from pydantic import BaseModel
 
+from langchain_community.chat_models import ChatOllama
+
 from langchain_aws import BedrockEmbeddings
 from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
@@ -19,22 +21,23 @@
 from ..models import (
     Anthropic,
     AzureOpenAI,
+    OpenAI,
     Bedrock,
     Gemini,
     Groq,
     HuggingFace,
-    Ollama,
-    OpenAI,
     OneApi,
     Fireworks,
     VertexAI,
     Nvidia
 )
 from ..models.ernie import Ernie
+from langchain.chat_models import init_chat_model
+
 from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
 
 from ..helpers import models_tokens
-from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
+from ..models import AzureOpenAI, OpenAI, Bedrock, Gemini, Groq, HuggingFace, Anthropic, DeepSeek
 
 
 class AbstractGraph(ABC):
@@ -213,8 +216,10 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return VertexAI(llm_params)
+
         elif "ollama" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("ollama/")[-1]
+            llm_params["model_provider"] = "ollama"
 
             # allow user to set model_tokens in config
             try:
@@ -231,7 +236,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except AttributeError:
                 self.model_token = 8192
 
-            return Ollama(llm_params)
+            return init_chat_model(**llm_params)
+
         elif "hugging_face" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
@@ -320,7 +326,7 @@ def _create_default_embedder(self, llm_config=None) -> object:
             return FireworksEmbeddings(model=self.llm_model.model_name)
         elif isinstance(self.llm_model, Nvidia):
             return NVIDIAEmbeddings(model=self.llm_model.model_name)
-        elif isinstance(self.llm_model, Ollama):
+        elif isinstance(self.llm_model, ChatOllama):
             # unwrap the kwargs from the model whihc is a dict
             params = self.llm_model._lc_kwargs
             # remove streaming and temperature
diff --git a/scrapegraphai/models/ollama.py b/scrapegraphai/models/ollama.py
deleted file mode 100644
index 4bf48178..00000000
--- a/scrapegraphai/models/ollama.py
+++ /dev/null
@@ -1,17 +0,0 @@
-""" 
-Ollama Module
-"""
-from langchain_community.chat_models import ChatOllama
-
-
-class Ollama(ChatOllama):
-    """
-    A wrapper for the ChatOllama class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)

From 5e3eb6e43df4bd4c452d34b49f254235e9ff1b22 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 11:26:20 +0200
Subject: [PATCH 15/51] refactor(OpenAI): integrate new LangChain chat init

---
 scrapegraphai/graphs/abstract_graph.py |  6 +++---
 scrapegraphai/models/openai.py         | 17 -----------------
 2 files changed, 3 insertions(+), 20 deletions(-)
 delete mode 100644 scrapegraphai/models/openai.py

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index f1c9ff92..a0d0c52c 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -8,6 +8,7 @@
 from pydantic import BaseModel
 
 from langchain_community.chat_models import ChatOllama
+from langchain_openai import ChatOpenAI
 
 from langchain_aws import BedrockEmbeddings
 from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
@@ -21,7 +22,6 @@
 from ..models import (
     Anthropic,
     AzureOpenAI,
-    OpenAI,
     Bedrock,
     Gemini,
     Groq,
@@ -37,7 +37,7 @@
 from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
 
 from ..helpers import models_tokens
-from ..models import AzureOpenAI, OpenAI, Bedrock, Gemini, Groq, HuggingFace, Anthropic, DeepSeek
+from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Anthropic, DeepSeek
 
 
 class AbstractGraph(ABC):
@@ -311,7 +311,7 @@ def _create_default_embedder(self, llm_config=None) -> object:
             return GoogleGenerativeAIEmbeddings(
                 google_api_key=llm_config["api_key"], model="models/embedding-001"
             )
-        if isinstance(self.llm_model, OpenAI):
+        if isinstance(self.llm_model, ChatOpenAI):
             return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key, 
                                     base_url=self.llm_model.openai_api_base)
         elif isinstance(self.llm_model, DeepSeek):
diff --git a/scrapegraphai/models/openai.py b/scrapegraphai/models/openai.py
deleted file mode 100644
index bfd9d74c..00000000
--- a/scrapegraphai/models/openai.py
+++ /dev/null
@@ -1,17 +0,0 @@
-""" 
-OpenAI Module
-"""
-from langchain_openai import ChatOpenAI
-
-
-class OpenAI(ChatOpenAI):
-    """
-    A wrapper for the ChatOpenAI class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)

From 9e795f4e35efa91de850d976f8f6b51232f9073e Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 29 Jul 2024 17:35:10 +0200
Subject: [PATCH 16/51] removed unused init files

Co-Authored-By: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
---
 requirements-dev.lock                  | 1 -
 requirements.lock                      | 1 -
 scrapegraphai/graphs/abstract_graph.py | 5 +++--
 scrapegraphai/models/__init__.py       | 2 --
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index bce18810..2c56f3db 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -180,7 +180,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/requirements.lock b/requirements.lock
index aa03fd14..a943dff1 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -128,7 +128,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index a0d0c52c..e1ce18f0 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -163,9 +163,10 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
         if "gpt-" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["openai"][llm_params["model"]]
+                llm_params["model_provider"] = "openai"
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return OpenAI(llm_params)
+            return init_chat_model(**llm_params)
         elif "oneapi" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
@@ -455,4 +456,4 @@ def run(self) -> str:
         """
         Abstract method to execute the graph and return the result.
         """
-        pass
+        pass
\ No newline at end of file
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
index bfcb84d6..81bceeb8 100644
--- a/scrapegraphai/models/__init__.py
+++ b/scrapegraphai/models/__init__.py
@@ -2,12 +2,10 @@
     __init__.py file for models folder
 """
 
-from .openai import OpenAI
 from .azure_openai import AzureOpenAI
 from .openai_itt import OpenAIImageToText
 from .openai_tts import OpenAITextToSpeech
 from .gemini import Gemini
-from .ollama import Ollama
 from .hugging_face import HuggingFace
 from .groq import Groq
 from .bedrock import Bedrock

From 2c5f934f101e319ec4e61009d4c464ca4626c1ff Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 17:46:17 +0200
Subject: [PATCH 17/51] refactor: remove LangChain wrappers

---
 scrapegraphai/nodes/generate_answer_node.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index 81812598..5022b16f 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -6,9 +6,10 @@
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
+from langchain_openai import ChatOpenAI
+from langchain_community.chat_models import ChatOllama
 from tqdm import tqdm
 from ..utils.logging import get_logger
-from ..models import Ollama, OpenAI
 from .base_node import BaseNode
 from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_md, template_no_chunks_md, template_merge_md
 
@@ -41,7 +42,7 @@ def __init__(
 
         self.llm_model = node_config["llm_model"]
 
-        if isinstance(node_config["llm_model"], Ollama):
+        if isinstance(node_config["llm_model"], ChatOllama):
             self.llm_model.format="json"
 
         self.verbose = (
@@ -93,7 +94,7 @@ def execute(self, state: dict) -> dict:
 
         format_instructions = output_parser.get_format_instructions()
 
-        if  isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator or self.is_md_scraper:
+        if  isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator or self.is_md_scraper:
             template_no_chunks_prompt = template_no_chunks_md
             template_chunks_prompt = template_chunks_md
             template_merge_prompt = template_merge_md

From 25066b2bc51517e50058231664230b8edef365b9 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 17:49:21 +0200
Subject: [PATCH 18/51] refactor: remove LangChain wrappers for Ollama

---
 scrapegraphai/nodes/generate_answer_omni_node.py | 4 ++--
 scrapegraphai/nodes/generate_answer_pdf_node.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scrapegraphai/nodes/generate_answer_omni_node.py b/scrapegraphai/nodes/generate_answer_omni_node.py
index c2f2b65d..98be26dd 100644
--- a/scrapegraphai/nodes/generate_answer_omni_node.py
+++ b/scrapegraphai/nodes/generate_answer_omni_node.py
@@ -10,7 +10,7 @@
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
-from ..models import Ollama
+from langchain_community.chat_models import ChatOllama
 # Imports from the library
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
@@ -44,7 +44,7 @@ def __init__(
         super().__init__(node_name, "node", input, output, 3, node_config)
 
         self.llm_model = node_config["llm_model"]
-        if isinstance(node_config["llm_model"], Ollama):
+        if isinstance(node_config["llm_model"], ChatOllama):
             self.llm_model.format="json"
 
         self.verbose = (
diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py
index 7add7948..47f14e86 100644
--- a/scrapegraphai/nodes/generate_answer_pdf_node.py
+++ b/scrapegraphai/nodes/generate_answer_pdf_node.py
@@ -10,7 +10,7 @@
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
-from ..models import Ollama
+from langchain_community.chat_models import ChatOllama
 from ..utils.logging import get_logger
 
 # Imports from the library
@@ -59,7 +59,7 @@ def __init__(
         super().__init__(node_name, "node", input, output, 2, node_config)
         
         self.llm_model = node_config["llm_model"]
-        if isinstance(node_config["llm_model"], Ollama):
+        if isinstance(node_config["llm_model"], ChatOllama):
             self.llm_model.format="json"
    
         self.verbose = (

From f6b7c6a4309d7a7460d46f75a76a926711a99f3c Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 29 Jul 2024 17:56:32 +0200
Subject: [PATCH 19/51] refactoring

Co-Authored-By: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
---
 scrapegraphai/nodes/fetch_node.py           | 9 ++++-----
 scrapegraphai/nodes/generate_answer_node.py | 1 +
 scrapegraphai/nodes/search_internet_node.py | 6 ++----
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 56366677..64a80cfe 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -4,7 +4,7 @@
 
 import json
 from typing import List, Optional
-
+from langchain_openai import ChatOpenAI
 import pandas as pd
 import requests
 from langchain_community.document_loaders import PyPDFLoader
@@ -14,7 +14,6 @@
 from ..utils.convert_to_md import convert_to_md
 from ..utils.logging import get_logger
 from .base_node import BaseNode
-from ..models import OpenAI
 
 
 class FetchNode(BaseNode):
@@ -165,7 +164,7 @@ def execute(self, state):
 
             parsed_content = source
 
-            if  isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator:
+            if  isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator:
                 parsed_content = convert_to_md(source)
 
             compressed_document = [
@@ -184,7 +183,7 @@ def execute(self, state):
                 if not self.cut:
                     parsed_content = cleanup_html(response, source)
 
-                if  (isinstance(self.llm_model, OpenAI) and not self.script_creator) or (self.force and not self.script_creator):
+                if  (isinstance(self.llm_model, ChatOpenAI) and not self.script_creator) or (self.force and not self.script_creator):
                     parsed_content = convert_to_md(source, input_data[0])
                 compressed_document = [Document(page_content=parsed_content)]
             else:
@@ -206,7 +205,7 @@ def execute(self, state):
                 raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.")
             parsed_content = document[0].page_content
 
-            if  isinstance(self.llm_model, OpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled:
+            if  isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled:
 
                 parsed_content = convert_to_md(document[0].page_content, input_data[0])
 
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index 5022b16f..12ae6f0f 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -9,6 +9,7 @@
 from langchain_openai import ChatOpenAI
 from langchain_community.chat_models import ChatOllama
 from tqdm import tqdm
+from langchain_openai import ChatOpenAI
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_md, template_no_chunks_md, template_merge_md
diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py
index 2474ab60..7588b995 100644
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@@ -6,12 +6,10 @@
 
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
-
+from langchain_community.chat_models import ChatOllama
 from ..utils.logging import get_logger
 from ..utils.research_web import search_on_web
 from .base_node import BaseNode
-from ..models import Ollama
-
 
 class SearchInternetNode(BaseNode):
     """
@@ -97,7 +95,7 @@ def execute(self, state: dict) -> dict:
         search_answer = search_prompt | self.llm_model | output_parser
         
         # Ollama: Use no json format when creating the search query
-        if isinstance(self.llm_model, Ollama) and self.llm_model.format == 'json':
+        if isinstance(self.llm_model, ChatOllama) and self.llm_model.format == 'json':
             self.llm_model.format = None
             search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
             self.llm_model.format = 'json'

From 5007167af1cb9a8a4f8ed9925ae765bff06017e1 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 29 Jul 2024 17:59:16 +0200
Subject: [PATCH 20/51] removed unused models

Co-Authored-By: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
---
 scrapegraphai/models/__init__.py     |  9 --------
 scrapegraphai/models/anthropic.py    | 17 --------------
 scrapegraphai/models/azure_openai.py | 17 --------------
 scrapegraphai/models/bedrock.py      | 19 ----------------
 scrapegraphai/models/fireworks.py    | 33 ----------------------------
 scrapegraphai/models/gemini.py       | 20 -----------------
 scrapegraphai/models/groq.py         | 17 --------------
 scrapegraphai/models/hugging_face.py | 17 --------------
 scrapegraphai/models/vertex.py       | 16 --------------
 9 files changed, 165 deletions(-)
 delete mode 100644 scrapegraphai/models/anthropic.py
 delete mode 100644 scrapegraphai/models/azure_openai.py
 delete mode 100644 scrapegraphai/models/bedrock.py
 delete mode 100644 scrapegraphai/models/fireworks.py
 delete mode 100644 scrapegraphai/models/gemini.py
 delete mode 100644 scrapegraphai/models/groq.py
 delete mode 100644 scrapegraphai/models/hugging_face.py
 delete mode 100644 scrapegraphai/models/vertex.py

diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
index 81bceeb8..9d27884b 100644
--- a/scrapegraphai/models/__init__.py
+++ b/scrapegraphai/models/__init__.py
@@ -1,17 +1,8 @@
 """
     __init__.py file for models folder
 """
-
-from .azure_openai import AzureOpenAI
 from .openai_itt import OpenAIImageToText
 from .openai_tts import OpenAITextToSpeech
-from .gemini import Gemini
-from .hugging_face import HuggingFace
-from .groq import Groq
-from .bedrock import Bedrock
-from .anthropic import Anthropic
 from .deepseek import DeepSeek
 from .oneapi import OneApi
-from .fireworks import Fireworks
-from .vertex import VertexAI
 from .nvidia import Nvidia
diff --git a/scrapegraphai/models/anthropic.py b/scrapegraphai/models/anthropic.py
deleted file mode 100644
index 3a7480d0..00000000
--- a/scrapegraphai/models/anthropic.py
+++ /dev/null
@@ -1,17 +0,0 @@
-""" 
-Anthropic Module
-"""
-from langchain_anthropic import ChatAnthropic
-
-
-class Anthropic(ChatAnthropic):
-    """
-    A wrapper for the ChatAnthropic class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)
\ No newline at end of file
diff --git a/scrapegraphai/models/azure_openai.py b/scrapegraphai/models/azure_openai.py
deleted file mode 100644
index ae47d4e6..00000000
--- a/scrapegraphai/models/azure_openai.py
+++ /dev/null
@@ -1,17 +0,0 @@
-""" 
-AzureOpenAI Module
-"""
-from langchain_openai import AzureChatOpenAI
-
-
-class AzureOpenAI(AzureChatOpenAI):
-    """
-    A wrapper for the AzureChatOpenAI class that provides default configuration
-    and could be extended with additional methods if needed.
-    
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)
diff --git a/scrapegraphai/models/bedrock.py b/scrapegraphai/models/bedrock.py
deleted file mode 100644
index 06299075..00000000
--- a/scrapegraphai/models/bedrock.py
+++ /dev/null
@@ -1,19 +0,0 @@
-""" 
-Bedrock Module
-"""
-from langchain_aws import ChatBedrock
-
-
-class Bedrock(ChatBedrock):
-    """Class for wrapping bedrock module"""
-
-    def __init__(self, llm_config: dict):
-        """
-        A wrapper for the ChatBedrock class that provides default configuration
-        and could be extended with additional methods if needed.
-
-        Args:
-            llm_config (dict): Configuration parameters for the language model.
-        """
-        # Initialize the superclass (ChatBedrock) with provided config parameters
-        super().__init__(**llm_config)
diff --git a/scrapegraphai/models/fireworks.py b/scrapegraphai/models/fireworks.py
deleted file mode 100644
index 445c4846..00000000
--- a/scrapegraphai/models/fireworks.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-Fireworks Module
-"""
-from langchain_fireworks import ChatFireworks
-
-
-class Fireworks(ChatFireworks):
-  """
-  Initializes the Fireworks class.
-
-  Args:
-      llm_config (dict): A dictionary containing configuration parameters for the LLM (required).
-          The specific keys and values will depend on the LLM implementation
-          used by the underlying `ChatFireworks` class. Consult its documentation
-          for details.
-
-  Raises:
-      ValueError: If required keys are missing from the llm_config dictionary.
-  """
-
-  def __init__(self, llm_config: dict):
-      """
-      Initializes the Fireworks class.
-
-      Args:
-          llm_config (dict): A dictionary containing configuration parameters for the LLM.
-              The specific keys and values will depend on the LLM implementation.
-
-      Raises:
-          ValueError: If required keys are missing from the llm_config dictionary.
-      """
-
-      super().__init__(**llm_config)
diff --git a/scrapegraphai/models/gemini.py b/scrapegraphai/models/gemini.py
deleted file mode 100644
index 1c939c6c..00000000
--- a/scrapegraphai/models/gemini.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-Gemini Module
-"""
-from langchain_google_genai import ChatGoogleGenerativeAI
-
-
-class Gemini(ChatGoogleGenerativeAI):
-    """
-    A wrapper for the Gemini class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model
-                        (e.g., model="gemini-pro")
-    """
-
-    def __init__(self, llm_config: dict):
-        # replace "api_key" to "google_api_key"
-        llm_config["google_api_key"] = llm_config.pop("api_key", None)
-        super().__init__(**llm_config)
diff --git a/scrapegraphai/models/groq.py b/scrapegraphai/models/groq.py
deleted file mode 100644
index 755f50aa..00000000
--- a/scrapegraphai/models/groq.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-Groq Module
-"""
-
-from langchain_groq import ChatGroq
-
-class Groq(ChatGroq):
-    """
-    A wrapper for the Groq class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model (e.g., model="llama3-70b-8192")
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)
\ No newline at end of file
diff --git a/scrapegraphai/models/hugging_face.py b/scrapegraphai/models/hugging_face.py
deleted file mode 100644
index 9696db1e..00000000
--- a/scrapegraphai/models/hugging_face.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-HuggingFace Module
-"""
-from langchain_community.chat_models.huggingface import ChatHuggingFace
-
-
-class HuggingFace(ChatHuggingFace):
-    """
-    A wrapper for the HuggingFace class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)
diff --git a/scrapegraphai/models/vertex.py b/scrapegraphai/models/vertex.py
deleted file mode 100644
index eb4676fc..00000000
--- a/scrapegraphai/models/vertex.py
+++ /dev/null
@@ -1,16 +0,0 @@
-""" 
-VertexAI Module
-"""
-from langchain_google_vertexai import ChatVertexAI
-
-class VertexAI(ChatVertexAI):
-    """
-    A wrapper for the ChatVertexAI class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)

From 927548624034b3c30eca60011d216720102d1815 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:57:37 +0200
Subject: [PATCH 21/51] refactor: remove redundant LangChain wrappers

---
 pyproject.toml                         |  3 +-
 requirements-dev.lock                  | 49 ++++++++++++++++
 requirements.lock                      | 52 +++++++++++++++++
 requirements.txt                       |  1 +
 scrapegraphai/graphs/abstract_graph.py | 77 ++++++++++++--------------
 5 files changed, 140 insertions(+), 42 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b7b0d55d..bee7b61d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,8 @@ dependencies = [
     "undetected-playwright>=0.3.0",
     "semchunk>=1.0.1",
     "langchain-fireworks>=0.1.3",
-    "langchain-community>=0.2.9"
+    "langchain-community>=0.2.9",
+    "langchain-huggingface>=0.0.3",
 ]
 
 license = "MIT"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 2c56f3db..0b3ef491 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -106,6 +106,8 @@ fastapi-pagination==0.12.26
     # via burr
 filelock==3.15.4
     # via huggingface-hub
+    # via torch
+    # via transformers
 fireworks-ai==0.14.0
     # via langchain-fireworks
 fonttools==4.53.1
@@ -117,6 +119,7 @@ frozenlist==1.4.1
     # via aiosignal
 fsspec==2024.6.1
     # via huggingface-hub
+    # via torch
 furo==2024.5.6
     # via scrapegraphai
 gitdb==4.0.11
@@ -180,6 +183,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -212,7 +216,10 @@ httpx==0.27.0
 httpx-sse==0.4.0
     # via fireworks-ai
 huggingface-hub==0.24.0
+    # via langchain-huggingface
+    # via sentence-transformers
     # via tokenizers
+    # via transformers
 idna==3.7
     # via anyio
     # via email-validator
@@ -235,11 +242,14 @@ jinja2==3.1.4
     # via fastapi
     # via pydeck
     # via sphinx
+    # via torch
 jiter==0.5.0
     # via anthropic
 jmespath==1.0.1
     # via boto3
     # via botocore
+joblib==1.4.2
+    # via scikit-learn
 jsonpatch==1.33
     # via langchain-core
 jsonpointer==3.0.0
@@ -268,6 +278,7 @@ langchain-core==0.2.22
     # via langchain-google-genai
     # via langchain-google-vertexai
     # via langchain-groq
+    # via langchain-huggingface
     # via langchain-nvidia-ai-endpoints
     # via langchain-openai
     # via langchain-text-splitters
@@ -279,6 +290,8 @@ langchain-google-vertexai==1.0.7
     # via scrapegraphai
 langchain-groq==0.1.6
     # via scrapegraphai
+langchain-huggingface==0.0.3
+    # via scrapegraphai
 langchain-nvidia-ai-endpoints==0.1.6
     # via scrapegraphai
 langchain-openai==0.1.17
@@ -309,6 +322,8 @@ minify-html==0.15.0
     # via scrapegraphai
 mpire==2.10.2
     # via semchunk
+mpmath==1.3.0
+    # via sympy
 multidict==6.0.5
     # via aiohttp
     # via yarl
@@ -316,6 +331,8 @@ multiprocess==0.70.16
     # via mpire
 mypy-extensions==1.0.0
     # via typing-inspect
+networkx==3.2.1
+    # via torch
 numpy==1.26.4
     # via altair
     # via contourpy
@@ -327,9 +344,13 @@ numpy==1.26.4
     # via pandas
     # via pyarrow
     # via pydeck
+    # via scikit-learn
+    # via scipy
+    # via sentence-transformers
     # via sf-hamilton
     # via shapely
     # via streamlit
+    # via transformers
 openai==1.37.0
     # via burr
     # via langchain-fireworks
@@ -348,6 +369,7 @@ packaging==24.1
     # via pytest
     # via sphinx
     # via streamlit
+    # via transformers
 pandas==2.2.2
     # via altair
     # via scrapegraphai
@@ -357,6 +379,7 @@ pillow==10.4.0
     # via fireworks-ai
     # via langchain-nvidia-ai-endpoints
     # via matplotlib
+    # via sentence-transformers
     # via streamlit
 platformdirs==4.2.2
     # via pylint
@@ -436,12 +459,14 @@ pyyaml==6.0.1
     # via langchain
     # via langchain-community
     # via langchain-core
+    # via transformers
     # via uvicorn
 referencing==0.35.1
     # via jsonschema
     # via jsonschema-specifications
 regex==2024.5.15
     # via tiktoken
+    # via transformers
 requests==2.32.3
     # via burr
     # via free-proxy
@@ -456,6 +481,7 @@ requests==2.32.3
     # via sphinx
     # via streamlit
     # via tiktoken
+    # via transformers
 rich==13.7.1
     # via streamlit
     # via typer
@@ -466,8 +492,17 @@ rsa==4.9
     # via google-auth
 s3transfer==0.10.2
     # via boto3
+safetensors==0.4.3
+    # via transformers
+scikit-learn==1.5.1
+    # via sentence-transformers
+scipy==1.13.1
+    # via scikit-learn
+    # via sentence-transformers
 semchunk==2.2.0
     # via scrapegraphai
+sentence-transformers==3.0.1
+    # via langchain-huggingface
 sf-hamilton==1.72.1
     # via burr
 shapely==2.0.5
@@ -513,16 +548,22 @@ starlette==0.37.2
     # via fastapi
 streamlit==1.36.0
     # via burr
+sympy==1.13.1
+    # via torch
 tenacity==8.5.0
     # via langchain
     # via langchain-community
     # via langchain-core
     # via streamlit
+threadpoolctl==3.5.0
+    # via scikit-learn
 tiktoken==0.7.0
     # via langchain-openai
     # via scrapegraphai
 tokenizers==0.19.1
     # via anthropic
+    # via langchain-huggingface
+    # via transformers
 toml==0.10.2
     # via streamlit
 tomli==2.0.1
@@ -532,6 +573,8 @@ tomlkit==0.13.0
     # via pylint
 toolz==0.12.1
     # via altair
+torch==2.2.2
+    # via sentence-transformers
 tornado==6.4.1
     # via streamlit
 tqdm==4.66.4
@@ -541,6 +584,11 @@ tqdm==4.66.4
     # via openai
     # via scrapegraphai
     # via semchunk
+    # via sentence-transformers
+    # via transformers
+transformers==4.43.3
+    # via langchain-huggingface
+    # via sentence-transformers
 typer==0.12.3
     # via fastapi-cli
 typing-extensions==4.12.2
@@ -562,6 +610,7 @@ typing-extensions==4.12.2
     # via sqlalchemy
     # via starlette
     # via streamlit
+    # via torch
     # via typer
     # via typing-inspect
     # via uvicorn
diff --git a/requirements.lock b/requirements.lock
index a943dff1..a9df041e 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -63,6 +63,8 @@ faiss-cpu==1.8.0.post1
     # via scrapegraphai
 filelock==3.15.4
     # via huggingface-hub
+    # via torch
+    # via transformers
 fireworks-ai==0.14.0
     # via langchain-fireworks
 free-proxy==1.1.1
@@ -72,6 +74,7 @@ frozenlist==1.4.1
     # via aiosignal
 fsspec==2024.6.1
     # via huggingface-hub
+    # via torch
 google==3.0.0
     # via scrapegraphai
 google-ai-generativelanguage==0.6.6
@@ -128,6 +131,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -156,17 +160,24 @@ httpx==0.27.0
 httpx-sse==0.4.0
     # via fireworks-ai
 huggingface-hub==0.24.0
+    # via langchain-huggingface
+    # via sentence-transformers
     # via tokenizers
+    # via transformers
 idna==3.7
     # via anyio
     # via httpx
     # via requests
     # via yarl
+jinja2==3.1.4
+    # via torch
 jiter==0.5.0
     # via anthropic
 jmespath==1.0.1
     # via boto3
     # via botocore
+joblib==1.4.2
+    # via scikit-learn
 jsonpatch==1.33
     # via langchain-core
 jsonpointer==3.0.0
@@ -189,6 +200,7 @@ langchain-core==0.2.22
     # via langchain-google-genai
     # via langchain-google-vertexai
     # via langchain-groq
+    # via langchain-huggingface
     # via langchain-nvidia-ai-endpoints
     # via langchain-openai
     # via langchain-text-splitters
@@ -200,6 +212,8 @@ langchain-google-vertexai==1.0.7
     # via scrapegraphai
 langchain-groq==0.1.6
     # via scrapegraphai
+langchain-huggingface==0.0.3
+    # via scrapegraphai
 langchain-nvidia-ai-endpoints==0.1.6
     # via scrapegraphai
 langchain-openai==0.1.17
@@ -212,12 +226,16 @@ langsmith==0.1.93
     # via langchain-core
 lxml==5.2.2
     # via free-proxy
+markupsafe==2.1.5
+    # via jinja2
 marshmallow==3.21.3
     # via dataclasses-json
 minify-html==0.15.0
     # via scrapegraphai
 mpire==2.10.2
     # via semchunk
+mpmath==1.3.0
+    # via sympy
 multidict==6.0.5
     # via aiohttp
     # via yarl
@@ -225,13 +243,19 @@ multiprocess==0.70.16
     # via mpire
 mypy-extensions==1.0.0
     # via typing-inspect
+networkx==3.2.1
+    # via torch
 numpy==1.26.4
     # via faiss-cpu
     # via langchain
     # via langchain-aws
     # via langchain-community
     # via pandas
+    # via scikit-learn
+    # via scipy
+    # via sentence-transformers
     # via shapely
+    # via transformers
 openai==1.37.0
     # via langchain-fireworks
     # via langchain-openai
@@ -244,11 +268,13 @@ packaging==24.1
     # via huggingface-hub
     # via langchain-core
     # via marshmallow
+    # via transformers
 pandas==2.2.2
     # via scrapegraphai
 pillow==10.4.0
     # via fireworks-ai
     # via langchain-nvidia-ai-endpoints
+    # via sentence-transformers
 playwright==1.45.0
     # via scrapegraphai
     # via undetected-playwright
@@ -303,8 +329,10 @@ pyyaml==6.0.1
     # via langchain
     # via langchain-community
     # via langchain-core
+    # via transformers
 regex==2024.5.15
     # via tiktoken
+    # via transformers
 requests==2.32.3
     # via free-proxy
     # via google-api-core
@@ -316,12 +344,22 @@ requests==2.32.3
     # via langchain-fireworks
     # via langsmith
     # via tiktoken
+    # via transformers
 rsa==4.9
     # via google-auth
 s3transfer==0.10.2
     # via boto3
+safetensors==0.4.3
+    # via transformers
+scikit-learn==1.5.1
+    # via sentence-transformers
+scipy==1.13.1
+    # via scikit-learn
+    # via sentence-transformers
 semchunk==2.2.0
     # via scrapegraphai
+sentence-transformers==3.0.1
+    # via langchain-huggingface
 shapely==2.0.5
     # via google-cloud-aiplatform
 six==1.16.0
@@ -337,15 +375,23 @@ soupsieve==2.5
 sqlalchemy==2.0.31
     # via langchain
     # via langchain-community
+sympy==1.13.1
+    # via torch
 tenacity==8.5.0
     # via langchain
     # via langchain-community
     # via langchain-core
+threadpoolctl==3.5.0
+    # via scikit-learn
 tiktoken==0.7.0
     # via langchain-openai
     # via scrapegraphai
 tokenizers==0.19.1
     # via anthropic
+    # via langchain-huggingface
+    # via transformers
+torch==2.2.2
+    # via sentence-transformers
 tqdm==4.66.4
     # via google-generativeai
     # via huggingface-hub
@@ -353,6 +399,11 @@ tqdm==4.66.4
     # via openai
     # via scrapegraphai
     # via semchunk
+    # via sentence-transformers
+    # via transformers
+transformers==4.43.3
+    # via langchain-huggingface
+    # via sentence-transformers
 typing-extensions==4.12.2
     # via anthropic
     # via anyio
@@ -364,6 +415,7 @@ typing-extensions==4.12.2
     # via pydantic-core
     # via pyee
     # via sqlalchemy
+    # via torch
     # via typing-inspect
 typing-inspect==0.9.0
     # via dataclasses-json
diff --git a/requirements.txt b/requirements.txt
index 440bf78a..8f3f5da5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,3 +22,4 @@ undetected-playwright>=0.3.0
 semchunk>=1.0.1
 langchain-fireworks>=0.1.3
 langchain-community>=0.2.9
+langchain-huggingface>=0.0.3
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index e1ce18f0..f27d1aee 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -3,33 +3,28 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Optional, Union
+from typing import Optional
 import uuid
 from pydantic import BaseModel
 
 from langchain_community.chat_models import ChatOllama
 from langchain_openai import ChatOpenAI
 
-from langchain_aws import BedrockEmbeddings
-from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
+from langchain_aws import BedrockEmbeddings, ChatBedrock
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
+from langchain_community.embeddings import OllamaEmbeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from langchain_google_vertexai import VertexAIEmbeddings
+from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
-from langchain_fireworks import FireworksEmbeddings
-from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
+from langchain_fireworks import FireworksEmbeddings, ChatFireworks
+from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
 from ..helpers import models_tokens
 from ..models import (
-    Anthropic,
-    AzureOpenAI,
-    Bedrock,
-    Gemini,
-    Groq,
-    HuggingFace,
     OneApi,
-    Fireworks,
-    VertexAI,
-    Nvidia
+    Nvidia,
+    DeepSeek
 )
 from ..models.ernie import Ernie
 from langchain.chat_models import init_chat_model
@@ -37,7 +32,6 @@
 from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
 
 from ..helpers import models_tokens
-from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Anthropic, DeepSeek
 
 
 class AbstractGraph(ABC):
@@ -181,7 +175,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return Fireworks(llm_params)
+            llm_params["model_provider"] = "fireworks"
+            return init_chat_model(**llm_params)
         elif "azure" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
@@ -189,7 +184,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = models_tokens["azure"][llm_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return AzureOpenAI(llm_params)
+            llm_params["model_provider"] = "azure_openai"
+            return init_chat_model(**llm_params)
         elif "nvidia" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
@@ -203,20 +199,23 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = models_tokens["gemini"][llm_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return Gemini(llm_params)
+            llm_params["model_provider"] = "google_genai "
+            return init_chat_model(**llm_params)
         elif llm_params["model"].startswith("claude"):
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
                 self.model_token = models_tokens["claude"][llm_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return Anthropic(llm_params)
+            llm_params["model_provider"] = "anthropic"
+            return init_chat_model(**llm_params)
         elif llm_params["model"].startswith("vertexai"):
             try:
                 self.model_token = models_tokens["vertexai"][llm_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return VertexAI(llm_params)
+            llm_params["model_provider"] = "google_vertexai"
+            return init_chat_model(**llm_params)
 
         elif "ollama" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("ollama/")[-1]
@@ -246,7 +245,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
-            return HuggingFace(llm_params)
+            llm_params["model_provider"] = "hugging_face"
+            return init_chat_model(**llm_params)
         elif "groq" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
 
@@ -255,7 +255,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
-            return Groq(llm_params)
+            llm_params["model_provider"] = "groq"
+            return init_chat_model(**llm_params)
         elif "bedrock" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
             model_id = llm_params["model"]
@@ -265,22 +266,16 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
-            return Bedrock(
-                {
-                    "client": client,
-                    "model_id": model_id,
-                    "model_kwargs": {
-                        "temperature": llm_params["temperature"],
-                    },
-                }
-            )
+            llm_params["model_provider"] = "bedrock"
+            return init_chat_model(**llm_params)
         elif "claude-3-" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["claude"]["claude3"]
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
-            return Anthropic(llm_params)
+            llm_params["model_provider"] = "anthropic"
+            return init_chat_model(**llm_params)
         elif "deepseek" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["deepseek"][llm_params["model"]]
@@ -308,7 +303,7 @@ def _create_default_embedder(self, llm_config=None) -> object:
         Raises:
             ValueError: If the model is not supported.
         """
-        if isinstance(self.llm_model, Gemini):
+        if isinstance(self.llm_model, ChatGoogleGenerativeAI):
             return GoogleGenerativeAIEmbeddings(
                 google_api_key=llm_config["api_key"], model="models/embedding-001"
             )
@@ -317,13 +312,13 @@ def _create_default_embedder(self, llm_config=None) -> object:
                                     base_url=self.llm_model.openai_api_base)
         elif isinstance(self.llm_model, DeepSeek):
             return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key)
-        elif isinstance(self.llm_model, VertexAI):
+        elif isinstance(self.llm_model, ChatVertexAI):
             return VertexAIEmbeddings()
         elif isinstance(self.llm_model, AzureOpenAIEmbeddings):
             return self.llm_model
-        elif isinstance(self.llm_model, AzureOpenAI):
+        elif isinstance(self.llm_model, AzureChatOpenAI):
             return AzureOpenAIEmbeddings()
-        elif isinstance(self.llm_model, Fireworks):
+        elif isinstance(self.llm_model, ChatFireworks):
             return FireworksEmbeddings(model=self.llm_model.model_name)
         elif isinstance(self.llm_model, Nvidia):
             return NVIDIAEmbeddings(model=self.llm_model.model_name)
@@ -335,9 +330,9 @@ def _create_default_embedder(self, llm_config=None) -> object:
             params.pop("temperature", None)
 
             return OllamaEmbeddings(**params)
-        elif isinstance(self.llm_model, HuggingFace):
-            return HuggingFaceHubEmbeddings(model=self.llm_model.model)
-        elif isinstance(self.llm_model, Bedrock):
+        elif isinstance(self.llm_model, ChatHuggingFace):
+            return HuggingFaceEmbeddings(model=self.llm_model.model)
+        elif isinstance(self.llm_model, ChatBedrock):
             return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id)
         else:
             raise ValueError("Embedding Model missing or not supported")
@@ -384,7 +379,7 @@ def _create_embedder(self, embedder_config: dict) -> object:
                 models_tokens["hugging_face"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return HuggingFaceHubEmbeddings(model=embedder_params["model"])
+            return HuggingFaceEmbeddings(model=embedder_params["model"])
         elif "fireworks" in embedder_params["model"]:
             embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
             try:

From bc2c9967d2f13ade6eeb7b23e9b423f6e79aa890 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:59:12 +0200
Subject: [PATCH 22/51] refactor: remove redundant wrappers for Ernie and
 Nvidia

---
 scrapegraphai/graphs/abstract_graph.py | 12 ++++++------
 scrapegraphai/models/ernie.py          | 17 -----------------
 scrapegraphai/models/nvidia.py         | 25 -------------------------
 3 files changed, 6 insertions(+), 48 deletions(-)
 delete mode 100644 scrapegraphai/models/ernie.py
 delete mode 100644 scrapegraphai/models/nvidia.py

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index f27d1aee..50de0a94 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -19,14 +19,14 @@
 from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
 from langchain_fireworks import FireworksEmbeddings, ChatFireworks
 from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI
-from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
+from langchain_community.chat_models import ErnieBotChat
 from ..helpers import models_tokens
 from ..models import (
     OneApi,
-    Nvidia,
     DeepSeek
 )
-from ..models.ernie import Ernie
+
 from langchain.chat_models import init_chat_model
 
 from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
@@ -192,7 +192,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return Nvidia(llm_params)
+            return ChatNVIDIA(llm_params)
         elif "gemini" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
@@ -289,7 +289,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
-            return Ernie(llm_params)
+            return ErnieBotChat(llm_params)
         else:
             raise ValueError("Model provided by the configuration not supported")
 
@@ -320,7 +320,7 @@ def _create_default_embedder(self, llm_config=None) -> object:
             return AzureOpenAIEmbeddings()
         elif isinstance(self.llm_model, ChatFireworks):
             return FireworksEmbeddings(model=self.llm_model.model_name)
-        elif isinstance(self.llm_model, Nvidia):
+        elif isinstance(self.llm_model, ChatNVIDIA):
             return NVIDIAEmbeddings(model=self.llm_model.model_name)
         elif isinstance(self.llm_model, ChatOllama):
             # unwrap the kwargs from the model whihc is a dict
diff --git a/scrapegraphai/models/ernie.py b/scrapegraphai/models/ernie.py
deleted file mode 100644
index 75e2a261..00000000
--- a/scrapegraphai/models/ernie.py
+++ /dev/null
@@ -1,17 +0,0 @@
-""" 
-Ernie Module
-"""
-from langchain_community.chat_models import ErnieBotChat
-
-
-class Ernie(ErnieBotChat):
-    """
-    A wrapper for the ErnieBotChat class that provides default configuration
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)
diff --git a/scrapegraphai/models/nvidia.py b/scrapegraphai/models/nvidia.py
deleted file mode 100644
index 48ce3c0f..00000000
--- a/scrapegraphai/models/nvidia.py
+++ /dev/null
@@ -1,25 +0,0 @@
-""" 
-This is a Python wrapper class for ChatNVIDIA. 
-It provides default configuration and could be extended with additional methods if needed.
-The purpose of this wrapper is to simplify the creation of instances of ChatNVIDIA by providing
-default configurations for certain parameters, 
-allowing users to focus on specifying other important parameters without having
-to understand all the details of the underlying class's constructor.
-It inherits from the base class ChatNVIDIA and overrides 
-its init method to provide a more user-friendly interface. 
-The constructor takes one argument: llm_config, which is used to initialize the superclass 
-with default configuration. 
-"""
-
-from langchain_nvidia_ai_endpoints import ChatNVIDIA
-
-class Nvidia(ChatNVIDIA): 
-    """ A wrapper for the Nvidia class that provides default configuration 
-    and could be extended with additional methods if needed.
-
-    Args:
-        llm_config (dict): Configuration parameters for the language model.
-    """
-
-    def __init__(self, llm_config: dict):
-        super().__init__(**llm_config)

From 07ef383ab94318d070a71685aa80af0c0d48d129 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 30 Jul 2024 11:11:31 +0200
Subject: [PATCH 23/51] add rye packages

Co-Authored-By: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
---
 requirements-dev.lock | 1 -
 requirements.lock     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 0b3ef491..6bbbd4b9 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -183,7 +183,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/requirements.lock b/requirements.lock
index a9df041e..b4d1015d 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -131,7 +131,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1

From 88710f1a7c7d50f57108456112da30d1a12a1ba1 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Tue, 30 Jul 2024 15:57:08 +0200
Subject: [PATCH 24/51] chore: remove unused import

---
 scrapegraphai/models/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
index 9d27884b..ce798ad8 100644
--- a/scrapegraphai/models/__init__.py
+++ b/scrapegraphai/models/__init__.py
@@ -5,4 +5,3 @@
 from .openai_tts import OpenAITextToSpeech
 from .deepseek import DeepSeek
 from .oneapi import OneApi
-from .nvidia import Nvidia

From 1db164e9e682eefbc1414989a043fefa2e9009c2 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 30 Jul 2024 16:12:31 +0200
Subject: [PATCH 25/51] feat: fix tests

---
 examples/single_node/robot_node.py       | 4 ++--
 tests/nodes/robot_node_test.py           | 3 +--
 tests/nodes/search_internet_node_test.py | 4 ++--
 tests/nodes/search_link_node_test.py     | 4 ++--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/single_node/robot_node.py b/examples/single_node/robot_node.py
index f51f8649..c2bcbbd1 100644
--- a/examples/single_node/robot_node.py
+++ b/examples/single_node/robot_node.py
@@ -2,7 +2,7 @@
 Example of custom graph using existing nodes
 """
 
-from scrapegraphai.models import Ollama
+from langchain_community.chat_models import ChatOllama
 from scrapegraphai.nodes import RobotsNode
 
 # ************************************************
@@ -26,7 +26,7 @@
 # Define the node
 # ************************************************
 
-llm_model = Ollama(graph_config["llm"])
+llm_model = ChatOllama(graph_config["llm"])
 
 robots_node = RobotsNode(
     input="url",
diff --git a/tests/nodes/robot_node_test.py b/tests/nodes/robot_node_test.py
index 00a45b05..62527dda 100644
--- a/tests/nodes/robot_node_test.py
+++ b/tests/nodes/robot_node_test.py
@@ -1,7 +1,6 @@
 import pytest
 from unittest.mock import MagicMock
-
-from scrapegraphai.models import Ollama
+from langchain_community.chat_models import ChatOllama
 from scrapegraphai.nodes import RobotsNode
 
 @pytest.fixture
diff --git a/tests/nodes/search_internet_node_test.py b/tests/nodes/search_internet_node_test.py
index db2cbdee..8e198448 100644
--- a/tests/nodes/search_internet_node_test.py
+++ b/tests/nodes/search_internet_node_test.py
@@ -1,5 +1,5 @@
 import unittest
-from scrapegraphai.models import Ollama
+from langchain_community.chat_models import ChatOllama
 from scrapegraphai.nodes import SearchInternetNode
 
 class TestSearchInternetNode(unittest.TestCase):
@@ -18,7 +18,7 @@ def setUp(self):
         }
 
         # Define the model
-        self.llm_model = Ollama(self.graph_config["llm"])
+        self.llm_model = ChatOllama(self.graph_config["llm"])
 
         # Initialize the SearchInternetNode
         self.search_node = SearchInternetNode(
diff --git a/tests/nodes/search_link_node_test.py b/tests/nodes/search_link_node_test.py
index 648db4ee..1f8c5a58 100644
--- a/tests/nodes/search_link_node_test.py
+++ b/tests/nodes/search_link_node_test.py
@@ -1,5 +1,5 @@
 import pytest
-from scrapegraphai.models import Ollama
+from langchain_community.chat_models import ChatOllama
 from scrapegraphai.nodes import SearchLinkNode
 from unittest.mock import patch, MagicMock
 
@@ -18,7 +18,7 @@ def setup():
     }
 
     # Instantiate the LLM model with the configuration
-    llm_model = Ollama(graph_config["llm"])
+    llm_model = ChatOllama(graph_config["llm"])
 
     # Define the SearchLinkNode with necessary configurations
     search_link_node = SearchLinkNode(

From b15fd9f4dc3643c9904a2cbaa5f392a6805c9762 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Tue, 30 Jul 2024 14:19:46 +0000
Subject: [PATCH 26/51] ci(release): 1.11.0-beta.5 [skip ci]

## [1.11.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.4...v1.11.0-beta.5) (2024-07-30)

### Features

* fix tests ([1db164e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1db164e9e682eefbc1414989a043fefa2e9009c2))

### chore

* remove unused import ([88710f1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/88710f1a7c7d50f57108456112da30d1a12a1ba1))

### Refactor

* **Ollama:** integrate new LangChain chat init ([d177afb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d177afb68be036465ede1f567d2562b145d77d36))
* **OpenAI:** integrate new LangChain chat init ([5e3eb6e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5e3eb6e43df4bd4c452d34b49f254235e9ff1b22))
* remove LangChain wrappers ([2c5f934](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c5f934f101e319ec4e61009d4c464ca4626c1ff))
* remove LangChain wrappers for Ollama ([25066b2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/25066b2bc51517e50058231664230b8edef365b9))
* remove redundant LangChain wrappers ([9275486](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/927548624034b3c30eca60011d216720102d1815))
* remove redundant wrappers for Ernie and Nvidia ([bc2c996](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bc2c9967d2f13ade6eeb7b23e9b423f6e79aa890))
---
 CHANGELOG.md   | 22 ++++++++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea0c578f..1d7b4c62 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,25 @@
+## [1.11.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.4...v1.11.0-beta.5) (2024-07-30)
+
+
+### Features
+
+* fix tests ([1db164e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/1db164e9e682eefbc1414989a043fefa2e9009c2))
+
+
+### chore
+
+* remove unused import ([88710f1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/88710f1a7c7d50f57108456112da30d1a12a1ba1))
+
+
+### Refactor
+
+* **Ollama:** integrate new LangChain chat init ([d177afb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d177afb68be036465ede1f567d2562b145d77d36))
+* **OpenAI:** integrate new LangChain chat init ([5e3eb6e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5e3eb6e43df4bd4c452d34b49f254235e9ff1b22))
+* remove LangChain wrappers ([2c5f934](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c5f934f101e319ec4e61009d4c464ca4626c1ff))
+* remove LangChain wrappers for Ollama ([25066b2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/25066b2bc51517e50058231664230b8edef365b9))
+* remove redundant LangChain wrappers ([9275486](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/927548624034b3c30eca60011d216720102d1815))
+* remove redundant wrappers for Ernie and Nvidia ([bc2c996](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bc2c9967d2f13ade6eeb7b23e9b423f6e79aa890))
+
 ## [1.11.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.3...v1.11.0-beta.4) (2024-07-25)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index bee7b61d..77d48e36 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b4"
+version = "1.11.0b5"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From b17756d934e0a26791bb51aa60a8c79b3f8b82a4 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Wed, 31 Jul 2024 13:07:19 +0200
Subject: [PATCH 27/51] style: enforce pylint styling

---
 scrapegraphai/graphs/abstract_graph.py | 95 +++++++++++++-------------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 50de0a94..b022607c 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -7,31 +7,24 @@
 import uuid
 from pydantic import BaseModel
 
-from langchain_community.chat_models import ChatOllama
-from langchain_openai import ChatOpenAI
-
+from langchain_community.chat_models import ChatOllama, ErnieBotChat
 from langchain_aws import BedrockEmbeddings, ChatBedrock
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
 from langchain_community.embeddings import OllamaEmbeddings
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
 from langchain_fireworks import FireworksEmbeddings, ChatFireworks
 from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
-from langchain_community.chat_models import ErnieBotChat
+from langchain.chat_models import init_chat_model
+
 from ..helpers import models_tokens
 from ..models import (
     OneApi,
     DeepSeek
 )
+from ..utils.logging import set_verbosity_warning, set_verbosity_info
 
-from langchain.chat_models import init_chat_model
-
-from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
-
-from ..helpers import models_tokens
 
 
 class AbstractGraph(ABC):
@@ -65,14 +58,14 @@ class AbstractGraph(ABC):
         >>> result = my_graph.run()
     """
 
-    def __init__(self, prompt: str, config: dict, 
+    def __init__(self, prompt: str, config: dict,
                  source: Optional[str] = None, schema: Optional[BaseModel] = None):
 
         self.prompt = prompt
         self.source = source
         self.config = config
         self.schema = schema
-        self.llm_model = self._create_llm(config["llm"], chat=True)
+        self.llm_model = self._create_llm(config["llm"])
         self.embedder_model = self._create_default_embedder(llm_config=config["llm"]) if "embeddings" not in config else self._create_embedder(
             config["embeddings"])
         self.verbose = False if config is None else config.get(
@@ -128,7 +121,7 @@ def set_common_params(self, params: dict, overwrite=False):
         for node in self.graph.nodes:
             node.update_config(params, overwrite)
 
-    def _create_llm(self, llm_config: dict, chat=False) -> object:
+    def _create_llm(self, llm_config: dict) -> object:
         """
         Create a large language model instance based on the configuration provided.
 
@@ -148,9 +141,9 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
         # If model instance is passed directly instead of the model details
         if "model_instance" in llm_params:
             try:
-               self.model_token = llm_params["model_tokens"]
+                self.model_token = llm_params["model_tokens"]
             except KeyError as exc:
-               raise KeyError("model_tokens not specified") from exc
+                raise KeyError("model_tokens not specified") from exc
             return llm_params["model_instance"]
 
         # Instantiate the language model based on the model name
@@ -161,7 +154,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return init_chat_model(**llm_params)
-        elif "oneapi" in llm_params["model"]:
+
+        if "oneapi" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
@@ -169,7 +163,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return OneApi(llm_params)
-        elif "fireworks" in llm_params["model"]:
+
+        if "fireworks" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["fireworks"][llm_params["model"].split("/")[-1]]
                 llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
@@ -177,7 +172,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 raise KeyError("Model not supported") from exc
             llm_params["model_provider"] = "fireworks"
             return init_chat_model(**llm_params)
-        elif "azure" in llm_params["model"]:
+
+        if "azure" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
@@ -186,14 +182,16 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 raise KeyError("Model not supported") from exc
             llm_params["model_provider"] = "azure_openai"
             return init_chat_model(**llm_params)
-        elif "nvidia" in llm_params["model"]:
+
+        if "nvidia" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
                 llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return ChatNVIDIA(llm_params)
-        elif "gemini" in llm_params["model"]:
+
+        if "gemini" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
                 self.model_token = models_tokens["gemini"][llm_params["model"]]
@@ -201,7 +199,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 raise KeyError("Model not supported") from exc
             llm_params["model_provider"] = "google_genai "
             return init_chat_model(**llm_params)
-        elif llm_params["model"].startswith("claude"):
+
+        if llm_params["model"].startswith("claude"):
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
                 self.model_token = models_tokens["claude"][llm_params["model"]]
@@ -209,7 +208,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 raise KeyError("Model not supported") from exc
             llm_params["model_provider"] = "anthropic"
             return init_chat_model(**llm_params)
-        elif llm_params["model"].startswith("vertexai"):
+
+        if llm_params["model"].startswith("vertexai"):
             try:
                 self.model_token = models_tokens["vertexai"][llm_params["model"]]
             except KeyError as exc:
@@ -217,7 +217,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             llm_params["model_provider"] = "google_vertexai"
             return init_chat_model(**llm_params)
 
-        elif "ollama" in llm_params["model"]:
+        if "ollama" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("ollama/")[-1]
             llm_params["model_provider"] = "ollama"
 
@@ -238,7 +238,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
 
             return init_chat_model(**llm_params)
 
-        elif "hugging_face" in llm_params["model"]:
+        if "hugging_face" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
                 self.model_token = models_tokens["hugging_face"][llm_params["model"]]
@@ -247,7 +247,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = 8192
             llm_params["model_provider"] = "hugging_face"
             return init_chat_model(**llm_params)
-        elif "groq" in llm_params["model"]:
+
+        if "groq" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
 
             try:
@@ -257,10 +258,9 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = 8192
             llm_params["model_provider"] = "groq"
             return init_chat_model(**llm_params)
-        elif "bedrock" in llm_params["model"]:
+
+        if "bedrock" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
-            model_id = llm_params["model"]
-            client = llm_params.get("client", None)
             try:
                 self.model_token = models_tokens["bedrock"][llm_params["model"]]
             except KeyError:
@@ -268,7 +268,8 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = 8192
             llm_params["model_provider"] = "bedrock"
             return init_chat_model(**llm_params)
-        elif "claude-3-" in llm_params["model"]:
+
+        if "claude-3-" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["claude"]["claude3"]
             except KeyError:
@@ -276,22 +277,24 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                 self.model_token = 8192
             llm_params["model_provider"] = "anthropic"
             return init_chat_model(**llm_params)
-        elif "deepseek" in llm_params["model"]:
+
+        if "deepseek" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["deepseek"][llm_params["model"]]
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
             return DeepSeek(llm_params)
-        elif "ernie" in llm_params["model"]:
+
+        if "ernie" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["ernie"][llm_params["model"]]
             except KeyError:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
             return ErnieBotChat(llm_params)
-        else:
-            raise ValueError("Model provided by the configuration not supported")
+
+        raise ValueError("Model provided by the configuration not supported")
 
     def _create_default_embedder(self, llm_config=None) -> object:
         """
@@ -308,7 +311,7 @@ def _create_default_embedder(self, llm_config=None) -> object:
                 google_api_key=llm_config["api_key"], model="models/embedding-001"
             )
         if isinstance(self.llm_model, ChatOpenAI):
-            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key, 
+            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key,
                                     base_url=self.llm_model.openai_api_base)
         elif isinstance(self.llm_model, DeepSeek):
             return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key)
@@ -356,7 +359,7 @@ def _create_embedder(self, embedder_config: dict) -> object:
         # Instantiate the embedding model based on the model name
         if "openai" in embedder_params["model"]:
             return OpenAIEmbeddings(api_key=embedder_params["api_key"])
-        elif "azure" in embedder_params["model"]:
+        if "azure" in embedder_params["model"]:
             return AzureOpenAIEmbeddings()
         if "nvidia" in embedder_params["model"]:
             embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
@@ -364,36 +367,36 @@ def _create_embedder(self, embedder_config: dict) -> object:
                 models_tokens["nvidia"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
-            return NVIDIAEmbeddings(model=embedder_params["model"], 
+            return NVIDIAEmbeddings(model=embedder_params["model"],
                                     nvidia_api_key=embedder_params["api_key"])
-        elif "ollama" in embedder_params["model"]:
+        if "ollama" in embedder_params["model"]:
             embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
             try:
                 models_tokens["ollama"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return OllamaEmbeddings(**embedder_params)
-        elif "hugging_face" in embedder_params["model"]:
+        if "hugging_face" in embedder_params["model"]:
             embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
             try:
                 models_tokens["hugging_face"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return HuggingFaceEmbeddings(model=embedder_params["model"])
-        elif "fireworks" in embedder_params["model"]:
+        if "fireworks" in embedder_params["model"]:
             embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
             try:
                 models_tokens["fireworks"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return FireworksEmbeddings(model=embedder_params["model"])
-        elif "gemini" in embedder_params["model"]:
+        if "gemini" in embedder_params["model"]:
             try:
                 models_tokens["gemini"][embedder_params["model"]]
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return GoogleGenerativeAIEmbeddings(model=embedder_params["model"])
-        elif "bedrock" in embedder_params["model"]:
+        if "bedrock" in embedder_params["model"]:
             embedder_params["model"] = embedder_params["model"].split("/")[-1]
             client = embedder_params.get("client", None)
             try:
@@ -401,8 +404,8 @@ def _create_embedder(self, embedder_config: dict) -> object:
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return BedrockEmbeddings(client=client, model_id=embedder_params["model"])
-        else:
-            raise ValueError("Model provided by the configuration not supported")
+
+        raise ValueError("Model provided by the configuration not supported")
 
     def get_state(self, key=None) -> dict:
         """ ""
@@ -444,11 +447,9 @@ def _create_graph(self):
         """
         Abstract method to create a graph representation.
         """
-        pass
 
     @abstractmethod
     def run(self) -> str:
         """
         Abstract method to execute the graph and return the result.
         """
-        pass
\ No newline at end of file

From bb73d916a1a7b378438038ec928eeda6d8f6ac9d Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Wed, 31 Jul 2024 13:41:09 +0200
Subject: [PATCH 28/51] refactor: reuse code for common interface models

---
 scrapegraphai/graphs/abstract_graph.py | 157 ++++++++-----------------
 1 file changed, 49 insertions(+), 108 deletions(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index b022607c..306901e8 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -146,138 +146,61 @@ def _create_llm(self, llm_config: dict) -> object:
                 raise KeyError("model_tokens not specified") from exc
             return llm_params["model_instance"]
 
-        # Instantiate the language model based on the model name
-        if "gpt-" in llm_params["model"]:
+        # Instantiate the language model based on the model name (models that use the common interface)
+        def handle_model(model_name, provider, token_key, default_token=8192):
             try:
-                self.model_token = models_tokens["openai"][llm_params["model"]]
-                llm_params["model_provider"] = "openai"
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
+                self.model_token = models_tokens[provider][token_key]
+            except KeyError:
+                print(f"Model not found, using default token size ({default_token})")
+                self.model_token = default_token
+            llm_params["model_provider"] = provider
+            llm_params["model"] = model_name
             return init_chat_model(**llm_params)
 
-        if "oneapi" in llm_params["model"]:
-            # take the model after the last dash
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["oneapi"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return OneApi(llm_params)
+        if "gpt-" in llm_params["model"]:
+            return handle_model(llm_params["model"], "openai", llm_params["model"])
 
         if "fireworks" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["fireworks"][llm_params["model"].split("/")[-1]]
-                llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            llm_params["model_provider"] = "fireworks"
-            return init_chat_model(**llm_params)
+            model_name = "/".join(llm_params["model"].split("/")[1:])
+            token_key = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "fireworks", token_key)
 
         if "azure" in llm_params["model"]:
-            # take the model after the last dash
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["azure"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            llm_params["model_provider"] = "azure_openai"
-            return init_chat_model(**llm_params)
-
-        if "nvidia" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
-                llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return ChatNVIDIA(llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "azure_openai", model_name)
 
         if "gemini" in llm_params["model"]:
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["gemini"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            llm_params["model_provider"] = "google_genai "
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "google_genai", model_name)
 
         if llm_params["model"].startswith("claude"):
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["claude"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            llm_params["model_provider"] = "anthropic"
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "anthropic", model_name)
 
         if llm_params["model"].startswith("vertexai"):
-            try:
-                self.model_token = models_tokens["vertexai"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            llm_params["model_provider"] = "google_vertexai"
-            return init_chat_model(**llm_params)
+            return handle_model(llm_params["model"], "google_vertexai", llm_params["model"])
 
         if "ollama" in llm_params["model"]:
-            llm_params["model"] = llm_params["model"].split("ollama/")[-1]
-            llm_params["model_provider"] = "ollama"
-
-            # allow user to set model_tokens in config
-            try:
-                if "model_tokens" in llm_params:
-                    self.model_token = llm_params["model_tokens"]
-                elif llm_params["model"] in models_tokens["ollama"]:
-                    try:
-                        self.model_token = models_tokens["ollama"][llm_params["model"]]
-                    except KeyError as exc:
-                        print("model not found, using default token size (8192)")
-                        self.model_token = 8192
-                else:
-                    self.model_token = 8192
-            except AttributeError:
-                self.model_token = 8192
-
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("ollama/")[-1]
+            token_key = model_name if "model_tokens" not in llm_params else llm_params["model_tokens"]
+            return handle_model(model_name, "ollama", token_key)
 
         if "hugging_face" in llm_params["model"]:
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["hugging_face"][llm_params["model"]]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            llm_params["model_provider"] = "hugging_face"
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "hugging_face", model_name)
 
         if "groq" in llm_params["model"]:
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-
-            try:
-                self.model_token = models_tokens["groq"][llm_params["model"]]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            llm_params["model_provider"] = "groq"
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "groq", model_name)
 
         if "bedrock" in llm_params["model"]:
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["bedrock"][llm_params["model"]]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            llm_params["model_provider"] = "bedrock"
-            return init_chat_model(**llm_params)
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "bedrock", model_name)
 
         if "claude-3-" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["claude"]["claude3"]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            llm_params["model_provider"] = "anthropic"
-            return init_chat_model(**llm_params)
+            return handle_model(llm_params["model"], "anthropic", "claude3")
 
+        # Instantiate the language model based on the model name (models that do not use the common interface)
         if "deepseek" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["deepseek"][llm_params["model"]]
@@ -293,7 +216,25 @@ def _create_llm(self, llm_config: dict) -> object:
                 print("model not found, using default token size (8192)")
                 self.model_token = 8192
             return ErnieBotChat(llm_params)
+        
+        if "oneapi" in llm_params["model"]:
+            # take the model after the last dash
+            llm_params["model"] = llm_params["model"].split("/")[-1]
+            try:
+                self.model_token = models_tokens["oneapi"][llm_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return OneApi(llm_params)
+        
+        if "nvidia" in llm_params["model"]:
+            try:
+                self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
+                llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return ChatNVIDIA(llm_params)
 
+        # Raise an error if the model did not match any of the previous cases
         raise ValueError("Model provided by the configuration not supported")
 
     def _create_default_embedder(self, llm_config=None) -> object:

From 4caed545e5030460b2d5e46f9ad90546ce36f0ee Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Wed, 31 Jul 2024 19:49:59 +0200
Subject: [PATCH 29/51] feat: intregration of firebase

---
 pyproject.toml                           |  1 +
 scrapegraphai/docloaders/__init__.py     |  1 +
 scrapegraphai/docloaders/broswer_base.py | 46 ++++++++++++++++++++++++
 3 files changed, 48 insertions(+)
 create mode 100644 scrapegraphai/docloaders/broswer_base.py

diff --git a/pyproject.toml b/pyproject.toml
index 77d48e36..2738bfd6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
     "langchain-fireworks>=0.1.3",
     "langchain-community>=0.2.9",
     "langchain-huggingface>=0.0.3",
+    "browserbase==0.3.0"
 ]
 
 license = "MIT"
diff --git a/scrapegraphai/docloaders/__init__.py b/scrapegraphai/docloaders/__init__.py
index a9e45407..0efdc879 100644
--- a/scrapegraphai/docloaders/__init__.py
+++ b/scrapegraphai/docloaders/__init__.py
@@ -1,3 +1,4 @@
 """__init__.py file for docloaders folder"""
 
 from .chromium import ChromiumLoader
+from .broswer_base import browser_base_fetch
\ No newline at end of file
diff --git a/scrapegraphai/docloaders/broswer_base.py b/scrapegraphai/docloaders/broswer_base.py
new file mode 100644
index 00000000..6127c097
--- /dev/null
+++ b/scrapegraphai/docloaders/broswer_base.py
@@ -0,0 +1,46 @@
+"""
+browserbase integration module 
+"""
+from browserbase import Browserbase
+
+def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
+    """
+    BrowserBase Fetch
+
+    This module provides an interface to the BrowserBase API.
+
+    The `browser_base_fetch` function takes three arguments:
+    - `api_key`: The API key provided by BrowserBase.
+    - `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
+    - `link`: The URL or link that you want to fetch data from.
+
+    It initializes a Browserbase object with the given API key and project ID, 
+    then uses this object to load the specified link. It returns the result of the loading operation.
+
+    Example usage:
+
+    ```
+    from browser_base_fetch import browser_base_fetch
+
+    result = browser_base_fetch(api_key="your_api_key", 
+    project_id="your_project_id", link="https://example.com")
+    print(result)
+    ```
+
+    Please note that you need to replace "your_api_key" and "your_project_id" 
+    with your actual BrowserBase API key and project ID.
+
+    Args:
+        api_key (str): The API key provided by BrowserBase.
+        project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
+        link (str): The URL or link that you want to fetch data from.
+
+    Returns:
+        object: The result of the loading operation.
+    """
+
+    browserbase = Browserbase(api_key=api_key, project_id=project_id)
+
+    result = browserbase.load(link)
+
+    return result

From 74ed8d06c5db4f9734521c2f84f4379b18b7308f Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 31 Jul 2024 17:51:22 +0000
Subject: [PATCH 30/51] ci(release): 1.11.0-beta.6 [skip ci]

## [1.11.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.5...v1.11.0-beta.6) (2024-07-31)

### Features

* intregration of firebase ([4caed54](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4caed545e5030460b2d5e46f9ad90546ce36f0ee))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d7b4c62..53e36c8a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.11.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.5...v1.11.0-beta.6) (2024-07-31)
+
+
+### Features
+
+* intregration of firebase ([4caed54](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4caed545e5030460b2d5e46f9ad90546ce36f0ee))
+
 ## [1.11.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.4...v1.11.0-beta.5) (2024-07-30)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 2738bfd6..4a7fe29f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b5"
+version = "1.11.0b6"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From fb87d01ced72c0912be86ae01d93ceefa5d2df08 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 11:27:10 +0200
Subject: [PATCH 31/51] Create browser_base.py

---
 examples/extras/browser_base.py | 47 +++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 examples/extras/browser_base.py

diff --git a/examples/extras/browser_base.py b/examples/extras/browser_base.py
new file mode 100644
index 00000000..465c80ba
--- /dev/null
+++ b/examples/extras/browser_base.py
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os, json
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+from dotenv import load_dotenv
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("OPENAI_API_KEY"),
+        "model": "gpt-3.5-turbo",
+    },
+    "browser_base": {
+        "api_key": os.getenv("BROWSER_BASE_API_KEY"),
+        "project_id": os.getenv("BROWSER_BASE_API_KEY"),
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me what does the company do, the name and a contact email.",
+    source="https://scrapegraphai.com/",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))

From 7076ab12d3e07d02a96ca00375454385303ae004 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 11:31:27 +0200
Subject: [PATCH 32/51] allignment

---
 pyproject.toml                           |  1 +
 requirements-dev.lock                    |  5 ++++
 requirements.lock                        |  5 ++++
 scrapegraphai/docloaders/__init__.py     |  1 +
 scrapegraphai/docloaders/browser_base.py | 38 ++++++++++++++++++++++++
 5 files changed, 50 insertions(+)
 create mode 100644 scrapegraphai/docloaders/browser_base.py

diff --git a/pyproject.toml b/pyproject.toml
index 77d48e36..2738bfd6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
     "langchain-fireworks>=0.1.3",
     "langchain-community>=0.2.9",
     "langchain-huggingface>=0.0.3",
+    "browserbase==0.3.0"
 ]
 
 license = "MIT"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6bbbd4b9..24b7156d 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -54,6 +54,8 @@ boto3==1.34.146
 botocore==1.34.146
     # via boto3
     # via s3transfer
+browserbase==0.3.0
+    # via scrapegraphai
 burr==0.22.1
     # via scrapegraphai
 cachetools==5.4.0
@@ -208,6 +210,7 @@ httptools==0.6.1
     # via uvicorn
 httpx==0.27.0
     # via anthropic
+    # via browserbase
     # via fastapi
     # via fireworks-ai
     # via groq
@@ -383,6 +386,7 @@ pillow==10.4.0
 platformdirs==4.2.2
     # via pylint
 playwright==1.45.0
+    # via browserbase
     # via scrapegraphai
     # via undetected-playwright
 pluggy==1.5.0
@@ -412,6 +416,7 @@ pyasn1-modules==0.4.0
     # via google-auth
 pydantic==2.8.2
     # via anthropic
+    # via browserbase
     # via burr
     # via fastapi
     # via fastapi-pagination
diff --git a/requirements.lock b/requirements.lock
index b4d1015d..0e8bb930 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -37,6 +37,8 @@ boto3==1.34.146
 botocore==1.34.146
     # via boto3
     # via s3transfer
+browserbase==0.3.0
+    # via scrapegraphai
 cachetools==5.4.0
     # via google-auth
 certifi==2024.7.4
@@ -153,6 +155,7 @@ httplib2==0.22.0
     # via google-auth-httplib2
 httpx==0.27.0
     # via anthropic
+    # via browserbase
     # via fireworks-ai
     # via groq
     # via openai
@@ -275,6 +278,7 @@ pillow==10.4.0
     # via langchain-nvidia-ai-endpoints
     # via sentence-transformers
 playwright==1.45.0
+    # via browserbase
     # via scrapegraphai
     # via undetected-playwright
 proto-plus==1.24.0
@@ -299,6 +303,7 @@ pyasn1-modules==0.4.0
     # via google-auth
 pydantic==2.8.2
     # via anthropic
+    # via browserbase
     # via fireworks-ai
     # via google-cloud-aiplatform
     # via google-generativeai
diff --git a/scrapegraphai/docloaders/__init__.py b/scrapegraphai/docloaders/__init__.py
index a9e45407..51561a42 100644
--- a/scrapegraphai/docloaders/__init__.py
+++ b/scrapegraphai/docloaders/__init__.py
@@ -1,3 +1,4 @@
 """__init__.py file for docloaders folder"""
 
 from .chromium import ChromiumLoader
+from .broswer_base import browser_base_fetch
diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py
new file mode 100644
index 00000000..8f2a0b8e
--- /dev/null
+++ b/scrapegraphai/docloaders/browser_base.py
@@ -0,0 +1,38 @@
+"""
+browserbase integration module 
+"""
+from browserbase import Browserbase
+
+def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
+    """
+    BrowserBase Fetch
+    This module provides an interface to the BrowserBase API.
+    The `browser_base_fetch` function takes three arguments:
+    - `api_key`: The API key provided by BrowserBase.
+    - `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
+    - `link`: The URL or link that you want to fetch data from.
+    It initializes a Browserbase object with the given API key and project ID, 
+    then uses this object to load the specified link. 
+    It returns the result of the loading operation.
+    Example usage:
+    ```
+    from browser_base_fetch import browser_base_fetch
+    result = browser_base_fetch(api_key="your_api_key", 
+    project_id="your_project_id", link="https://example.com")
+    print(result)
+    ```
+    Please note that you need to replace "your_api_key" and "your_project_id" 
+    with your actual BrowserBase API key and project ID.
+    Args:
+        api_key (str): The API key provided by BrowserBase.
+        project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
+        link (str): The URL or link that you want to fetch data from.
+    Returns:
+        object: The result of the loading operation.
+    """
+
+    browserbase = Browserbase(api_key=api_key, project_id=project_id)
+
+    result = browserbase.load(link)
+
+    return result

From a94ebcde0078d66d33e67f7e0a87850efb92d408 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Thu, 1 Aug 2024 11:53:17 +0200
Subject: [PATCH 33/51] refactor: move embeddings code from AbstractGraph to
 RAGNode

---
 scrapegraphai/graphs/abstract_graph.py | 123 +--------------------
 scrapegraphai/nodes/rag_node.py        | 144 ++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 125 deletions(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 306901e8..4ed08057 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -7,15 +7,8 @@
 import uuid
 from pydantic import BaseModel
 
-from langchain_community.chat_models import ChatOllama, ErnieBotChat
-from langchain_aws import BedrockEmbeddings, ChatBedrock
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
-from langchain_community.embeddings import OllamaEmbeddings
-from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
-from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
-from langchain_fireworks import FireworksEmbeddings, ChatFireworks
-from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI
-from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
+from langchain_community.chat_models import ErnieBotChat
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
 from langchain.chat_models import init_chat_model
 
 from ..helpers import models_tokens
@@ -66,8 +59,6 @@ def __init__(self, prompt: str, config: dict,
         self.config = config
         self.schema = schema
         self.llm_model = self._create_llm(config["llm"])
-        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]) if "embeddings" not in config else self._create_embedder(
-            config["embeddings"])
         self.verbose = False if config is None else config.get(
             "verbose", False)
         self.headless = True if config is None else config.get(
@@ -237,116 +228,6 @@ def handle_model(model_name, provider, token_key, default_token=8192):
         # Raise an error if the model did not match any of the previous cases
         raise ValueError("Model provided by the configuration not supported")
 
-    def _create_default_embedder(self, llm_config=None) -> object:
-        """
-        Create an embedding model instance based on the chosen llm model.
-
-        Returns:
-            object: An instance of the embedding model client.
-
-        Raises:
-            ValueError: If the model is not supported.
-        """
-        if isinstance(self.llm_model, ChatGoogleGenerativeAI):
-            return GoogleGenerativeAIEmbeddings(
-                google_api_key=llm_config["api_key"], model="models/embedding-001"
-            )
-        if isinstance(self.llm_model, ChatOpenAI):
-            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key,
-                                    base_url=self.llm_model.openai_api_base)
-        elif isinstance(self.llm_model, DeepSeek):
-            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key)
-        elif isinstance(self.llm_model, ChatVertexAI):
-            return VertexAIEmbeddings()
-        elif isinstance(self.llm_model, AzureOpenAIEmbeddings):
-            return self.llm_model
-        elif isinstance(self.llm_model, AzureChatOpenAI):
-            return AzureOpenAIEmbeddings()
-        elif isinstance(self.llm_model, ChatFireworks):
-            return FireworksEmbeddings(model=self.llm_model.model_name)
-        elif isinstance(self.llm_model, ChatNVIDIA):
-            return NVIDIAEmbeddings(model=self.llm_model.model_name)
-        elif isinstance(self.llm_model, ChatOllama):
-            # unwrap the kwargs from the model whihc is a dict
-            params = self.llm_model._lc_kwargs
-            # remove streaming and temperature
-            params.pop("streaming", None)
-            params.pop("temperature", None)
-
-            return OllamaEmbeddings(**params)
-        elif isinstance(self.llm_model, ChatHuggingFace):
-            return HuggingFaceEmbeddings(model=self.llm_model.model)
-        elif isinstance(self.llm_model, ChatBedrock):
-            return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id)
-        else:
-            raise ValueError("Embedding Model missing or not supported")
-
-    def _create_embedder(self, embedder_config: dict) -> object:
-        """
-        Create an embedding model instance based on the configuration provided.
-
-        Args:
-            embedder_config (dict): Configuration parameters for the embedding model.
-
-        Returns:
-            object: An instance of the embedding model client.
-
-        Raises:
-            KeyError: If the model is not supported.
-        """
-        embedder_params = {**embedder_config}
-        if "model_instance" in embedder_config:
-            return embedder_params["model_instance"]
-        # Instantiate the embedding model based on the model name
-        if "openai" in embedder_params["model"]:
-            return OpenAIEmbeddings(api_key=embedder_params["api_key"])
-        if "azure" in embedder_params["model"]:
-            return AzureOpenAIEmbeddings()
-        if "nvidia" in embedder_params["model"]:
-            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
-            try:
-                models_tokens["nvidia"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return NVIDIAEmbeddings(model=embedder_params["model"],
-                                    nvidia_api_key=embedder_params["api_key"])
-        if "ollama" in embedder_params["model"]:
-            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
-            try:
-                models_tokens["ollama"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return OllamaEmbeddings(**embedder_params)
-        if "hugging_face" in embedder_params["model"]:
-            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
-            try:
-                models_tokens["hugging_face"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return HuggingFaceEmbeddings(model=embedder_params["model"])
-        if "fireworks" in embedder_params["model"]:
-            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
-            try:
-                models_tokens["fireworks"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return FireworksEmbeddings(model=embedder_params["model"])
-        if "gemini" in embedder_params["model"]:
-            try:
-                models_tokens["gemini"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return GoogleGenerativeAIEmbeddings(model=embedder_params["model"])
-        if "bedrock" in embedder_params["model"]:
-            embedder_params["model"] = embedder_params["model"].split("/")[-1]
-            client = embedder_params.get("client", None)
-            try:
-                models_tokens["bedrock"][embedder_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return BedrockEmbeddings(client=client, model_id=embedder_params["model"])
-
-        raise ValueError("Model provided by the configuration not supported")
 
     def get_state(self, key=None) -> dict:
         """ ""
diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py
index a4f58191..952daa6c 100644
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@@ -14,8 +14,20 @@
 from langchain_community.document_transformers import EmbeddingsRedundantFilter
 from langchain_community.vectorstores import FAISS
 
+from langchain_community.chat_models import ChatOllama
+from langchain_aws import BedrockEmbeddings, ChatBedrock
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEmbeddings
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
+from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
+from langchain_fireworks import FireworksEmbeddings, ChatFireworks
+from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings, ChatOpenAI, AzureChatOpenAI
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
+
 from ..utils.logging import get_logger
 from .base_node import BaseNode
+from ..helpers import models_tokens
+from ..models import DeepSeek
 
 
 class RAGNode(BaseNode):
@@ -95,10 +107,21 @@ def execute(self, state: dict) -> dict:
         self.logger.info("--- (updated chunks metadata) ---")
 
         # check if embedder_model is provided, if not use llm_model
-        self.embedder_model = (
-            self.embedder_model if self.embedder_model else self.llm_model
-        )
-        embeddings = self.embedder_model
+        if self.embedder_model is not None:
+            embeddings = self.embedder_model
+        elif 'embeddings' in self.node_config:
+            try:
+                embeddings = self._create_embedder(self.node_config['embedder_config'])
+            except Exception:
+                try:
+                    embeddings = self._create_default_embedder()
+                    self.embedder_model = embeddings
+                except ValueError:
+                    embeddings = self.llm_model
+                    self.embedder_model = self.llm_model
+        else:
+            embeddings = self.llm_model
+            self.embedder_model = self.llm_model
 
         folder_name = self.node_config.get("cache_path", "cache")
 
@@ -141,3 +164,116 @@ def execute(self, state: dict) -> dict:
 
         state.update({self.output[0]: compressed_docs})
         return state
+    
+
+    def _create_default_embedder(self, llm_config=None) -> object:
+        """
+        Create an embedding model instance based on the chosen llm model.
+
+        Returns:
+            object: An instance of the embedding model client.
+
+        Raises:
+            ValueError: If the model is not supported.
+        """
+        if isinstance(self.llm_model, ChatGoogleGenerativeAI):
+            return GoogleGenerativeAIEmbeddings(
+                google_api_key=llm_config["api_key"], model="models/embedding-001"
+            )
+        if isinstance(self.llm_model, ChatOpenAI):
+            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key,
+                                    base_url=self.llm_model.openai_api_base)
+        elif isinstance(self.llm_model, DeepSeek):
+            return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key)
+        elif isinstance(self.llm_model, ChatVertexAI):
+            return VertexAIEmbeddings()
+        elif isinstance(self.llm_model, AzureOpenAIEmbeddings):
+            return self.llm_model
+        elif isinstance(self.llm_model, AzureChatOpenAI):
+            return AzureOpenAIEmbeddings()
+        elif isinstance(self.llm_model, ChatFireworks):
+            return FireworksEmbeddings(model=self.llm_model.model_name)
+        elif isinstance(self.llm_model, ChatNVIDIA):
+            return NVIDIAEmbeddings(model=self.llm_model.model_name)
+        elif isinstance(self.llm_model, ChatOllama):
+            # unwrap the kwargs from the model whihc is a dict
+            params = self.llm_model._lc_kwargs
+            # remove streaming and temperature
+            params.pop("streaming", None)
+            params.pop("temperature", None)
+
+            return OllamaEmbeddings(**params)
+        elif isinstance(self.llm_model, ChatHuggingFace):
+            return HuggingFaceEmbeddings(model=self.llm_model.model)
+        elif isinstance(self.llm_model, ChatBedrock):
+            return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id)
+        else:
+            raise ValueError("Embedding Model missing or not supported")
+
+
+    def _create_embedder(self, embedder_config: dict) -> object:
+        """
+        Create an embedding model instance based on the configuration provided.
+
+        Args:
+            embedder_config (dict): Configuration parameters for the embedding model.
+
+        Returns:
+            object: An instance of the embedding model client.
+
+        Raises:
+            KeyError: If the model is not supported.
+        """
+        embedder_params = {**embedder_config}
+        if "model_instance" in embedder_config:
+            return embedder_params["model_instance"]
+        # Instantiate the embedding model based on the model name
+        if "openai" in embedder_params["model"]:
+            return OpenAIEmbeddings(api_key=embedder_params["api_key"])
+        if "azure" in embedder_params["model"]:
+            return AzureOpenAIEmbeddings()
+        if "nvidia" in embedder_params["model"]:
+            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
+            try:
+                models_tokens["nvidia"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return NVIDIAEmbeddings(model=embedder_params["model"],
+                                    nvidia_api_key=embedder_params["api_key"])
+        if "ollama" in embedder_params["model"]:
+            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
+            try:
+                models_tokens["ollama"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return OllamaEmbeddings(**embedder_params)
+        if "hugging_face" in embedder_params["model"]:
+            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
+            try:
+                models_tokens["hugging_face"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return HuggingFaceEmbeddings(model=embedder_params["model"])
+        if "fireworks" in embedder_params["model"]:
+            embedder_params["model"] = "/".join(embedder_params["model"].split("/")[1:])
+            try:
+                models_tokens["fireworks"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return FireworksEmbeddings(model=embedder_params["model"])
+        if "gemini" in embedder_params["model"]:
+            try:
+                models_tokens["gemini"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return GoogleGenerativeAIEmbeddings(model=embedder_params["model"])
+        if "bedrock" in embedder_params["model"]:
+            embedder_params["model"] = embedder_params["model"].split("/")[-1]
+            client = embedder_params.get("client", None)
+            try:
+                models_tokens["bedrock"][embedder_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model not supported") from exc
+            return BedrockEmbeddings(client=client, model_id=embedder_params["model"])
+
+        raise ValueError("Model provided by the configuration not supported")

From 5ecdbe715f4bb223fa1be834fda07ccea2a51cb9 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 12:51:18 +0200
Subject: [PATCH 34/51] feat: add integration in the abstract grapgh

---
 ...ser_base.py => browser_base_integration.py} |  6 ++++--
 scrapegraphai/docloaders/__init__.py           |  2 +-
 scrapegraphai/graphs/abstract_graph.py         | 15 ++++++++-------
 scrapegraphai/nodes/fetch_node.py              | 18 ++++++++++++++----
 4 files changed, 27 insertions(+), 14 deletions(-)
 rename examples/extras/{browser_base.py => browser_base_integration.py} (98%)

diff --git a/examples/extras/browser_base.py b/examples/extras/browser_base_integration.py
similarity index 98%
rename from examples/extras/browser_base.py
rename to examples/extras/browser_base_integration.py
index 465c80ba..97529879 100644
--- a/examples/extras/browser_base.py
+++ b/examples/extras/browser_base_integration.py
@@ -2,10 +2,12 @@
 Basic example of scraping pipeline using SmartScraper
 """
 
-import os, json
+import os
+import json
+from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
-from dotenv import load_dotenv
+
 load_dotenv()
 
 # ************************************************
diff --git a/scrapegraphai/docloaders/__init__.py b/scrapegraphai/docloaders/__init__.py
index 51561a42..45a3783d 100644
--- a/scrapegraphai/docloaders/__init__.py
+++ b/scrapegraphai/docloaders/__init__.py
@@ -1,4 +1,4 @@
 """__init__.py file for docloaders folder"""
 
 from .chromium import ChromiumLoader
-from .broswer_base import browser_base_fetch
+from .browser_base import browser_base_fetch
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 50de0a94..2ccc988b 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -72,15 +72,16 @@ def __init__(self, prompt: str, config: dict,
         self.source = source
         self.config = config
         self.schema = schema
-        self.llm_model = self._create_llm(config["llm"], chat=True)
-        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]) if "embeddings" not in config else self._create_embedder(
-            config["embeddings"])
-        self.verbose = False if config is None else config.get(
+        self.llm_model = self._create_llm(self.config["llm"], chat=True)
+        self.embedder_model = self._create_default_embedder(llm_config=self.config["llm"]) if "embeddings" not in self.config else self._create_embedder(
+            self.config["embeddings"])
+        self.verbose = False if self.config is None else self.config.get(
             "verbose", False)
-        self.headless = True if config is None else config.get(
+        self.headless = True if self.config is None else config.get(
             "headless", True)
-        self.loader_kwargs = config.get("loader_kwargs", {})
-        self.cache_path = config.get("cache_path", False)
+        self.loader_kwargs = self.config.get("loader_kwargs", {})
+        self.cache_path = self.config.get("cache_path", False)
+        self.browser_base = self.config.get("browser_base")
 
         # Create the graph
         self.graph = self._create_graph()
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 64a80cfe..95561a66 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -11,6 +11,7 @@
 from langchain_core.documents import Document
 from ..utils.cleanup_html import cleanup_html
 from ..docloaders import ChromiumLoader
+from ..docloaders.browser_base import browser_base_fetch
 from ..utils.convert_to_md import convert_to_md
 from ..utils.logging import get_logger
 from .base_node import BaseNode
@@ -74,6 +75,8 @@ def __init__(
             False if node_config is None else node_config.get("cut", True)
         )
 
+        self.browser_base = node_config.get("browser_base")
+
     def execute(self, state):
         """
         Executes the node's logic to fetch HTML content from a specified URL and
@@ -164,7 +167,7 @@ def execute(self, state):
 
             parsed_content = source
 
-            if  isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator:
+            if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator:
                 parsed_content = convert_to_md(source)
 
             compressed_document = [
@@ -177,7 +180,7 @@ def execute(self, state):
             if response.status_code == 200:
                 if not response.text.strip():
                     raise ValueError("No HTML body content found in the response.")
-                
+
                 parsed_content = response
    
                 if not self.cut:
@@ -198,8 +201,15 @@ def execute(self, state):
             if self.node_config is not None:
                 loader_kwargs = self.node_config.get("loader_kwargs", {})
 
-            loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
-            document = loader.load()
+            if self.browser_base is not None:
+                document = [
+                Document(page_content= browser_base_fetch(self.browser_base.get("api_key"),
+                                                          self.browser_base.get("project_id"), source),
+                         metadata={})
+            ]
+            else:
+                loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
+                document = loader.load()
 
             if not document or not document[0].page_content.strip():
                 raise ValueError("No HTML body content found in the document fetched by ChromiumLoader.")

From 65f9e3a24c8f192d42fb467c03a33fd4b1f64588 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 12:58:35 +0200
Subject: [PATCH 35/51] Delete browser_base.py

---
 scrapegraphai/docloaders/browser_base.py | 38 ------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 scrapegraphai/docloaders/browser_base.py

diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py
deleted file mode 100644
index 8f2a0b8e..00000000
--- a/scrapegraphai/docloaders/browser_base.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-browserbase integration module 
-"""
-from browserbase import Browserbase
-
-def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
-    """
-    BrowserBase Fetch
-    This module provides an interface to the BrowserBase API.
-    The `browser_base_fetch` function takes three arguments:
-    - `api_key`: The API key provided by BrowserBase.
-    - `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
-    - `link`: The URL or link that you want to fetch data from.
-    It initializes a Browserbase object with the given API key and project ID, 
-    then uses this object to load the specified link. 
-    It returns the result of the loading operation.
-    Example usage:
-    ```
-    from browser_base_fetch import browser_base_fetch
-    result = browser_base_fetch(api_key="your_api_key", 
-    project_id="your_project_id", link="https://example.com")
-    print(result)
-    ```
-    Please note that you need to replace "your_api_key" and "your_project_id" 
-    with your actual BrowserBase API key and project ID.
-    Args:
-        api_key (str): The API key provided by BrowserBase.
-        project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
-        link (str): The URL or link that you want to fetch data from.
-    Returns:
-        object: The result of the loading operation.
-    """
-
-    browserbase = Browserbase(api_key=api_key, project_id=project_id)
-
-    result = browserbase.load(link)
-
-    return result

From d03eedccd718379f267fa305165ad61a295112f8 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 13:05:13 +0200
Subject: [PATCH 36/51] Update chromium.py

---
 scrapegraphai/docloaders/chromium.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scrapegraphai/docloaders/chromium.py b/scrapegraphai/docloaders/chromium.py
index 474c22de..cb0cfd9a 100644
--- a/scrapegraphai/docloaders/chromium.py
+++ b/scrapegraphai/docloaders/chromium.py
@@ -1,3 +1,6 @@
+"""
+Chromium module
+"""
 import asyncio
 from typing import Any, AsyncIterator, Iterator, List, Optional
 

From e21d461710e036eb3f71382a2d0d832bf1863c39 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 13:16:49 +0200
Subject: [PATCH 37/51] push

---
 .../docloaders/{broswer_base.py => browser_base.py}      | 8 +++++---
 scrapegraphai/nodes/fetch_node.py                        | 9 ++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)
 rename scrapegraphai/docloaders/{broswer_base.py => browser_base.py} (83%)

diff --git a/scrapegraphai/docloaders/broswer_base.py b/scrapegraphai/docloaders/browser_base.py
similarity index 83%
rename from scrapegraphai/docloaders/broswer_base.py
rename to scrapegraphai/docloaders/browser_base.py
index 6127c097..47798e29 100644
--- a/scrapegraphai/docloaders/broswer_base.py
+++ b/scrapegraphai/docloaders/browser_base.py
@@ -2,8 +2,9 @@
 browserbase integration module 
 """
 from browserbase import Browserbase
+from typing import List
 
-def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
+def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[str]:
     """
     BrowserBase Fetch
 
@@ -15,7 +16,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
     - `link`: The URL or link that you want to fetch data from.
 
     It initializes a Browserbase object with the given API key and project ID, 
-    then uses this object to load the specified link. It returns the result of the loading operation.
+    then uses this object to load the specified link. 
+    It returns the result of the loading operation.
 
     Example usage:
 
@@ -41,6 +43,6 @@ def browser_base_fetch(api_key: str, project_id: str, link: str) -> object:
 
     browserbase = Browserbase(api_key=api_key, project_id=project_id)
 
-    result = browserbase.load(link)
+    result = browserbase.load([link])
 
     return result
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 95561a66..741f6a22 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -202,11 +202,10 @@ def execute(self, state):
                 loader_kwargs = self.node_config.get("loader_kwargs", {})
 
             if self.browser_base is not None:
-                document = [
-                Document(page_content= browser_base_fetch(self.browser_base.get("api_key"),
-                                                          self.browser_base.get("project_id"), source),
-                         metadata={})
-            ]
+                data =  browser_base_fetch(self.browser_base.get("api_key"),
+                                                          self.browser_base.get("project_id"), source)
+
+                document = [Document(page_content= data, metadata={"source": "html file"})]
             else:
                 loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
                 document = loader.load()

From 968c69e217d9c180b9b8c2aa52ca59b9a1733525 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 13:23:54 +0200
Subject: [PATCH 38/51] fix: fixed bug on fetch_node

Co-Authored-By: Federico Minutoli <40361744+DiTo97@users.noreply.github.com>
---
 scrapegraphai/docloaders/browser_base.py | 2 +-
 scrapegraphai/nodes/fetch_node.py        | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py
index 47798e29..dd290d2d 100644
--- a/scrapegraphai/docloaders/browser_base.py
+++ b/scrapegraphai/docloaders/browser_base.py
@@ -43,6 +43,6 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s
 
     browserbase = Browserbase(api_key=api_key, project_id=project_id)
 
-    result = browserbase.load([link])
+    result = browserbase.load(link)
 
     return result
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 741f6a22..86b02bf6 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -202,10 +202,11 @@ def execute(self, state):
                 loader_kwargs = self.node_config.get("loader_kwargs", {})
 
             if self.browser_base is not None:
-                data =  browser_base_fetch(self.browser_base.get("api_key"),
-                                                          self.browser_base.get("project_id"), source)
+                if self.browser_base is not None:
+                    data =  browser_base_fetch(self.browser_base.get("api_key"),
+                                                self.browser_base.get("project_id"), [source])
 
-                document = [Document(page_content= data, metadata={"source": "html file"})]
+                    document = [Document(page_content=content, metadata={"source": source}) for content in data]
             else:
                 loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
                 document = loader.load()

From 6d8e02cd62ecf213cfff6e8258b79564db8eeb55 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 13:24:32 +0200
Subject: [PATCH 39/51] Update browser_base.py

Co-Authored-By: Federico Minutoli <40361744+DiTo97@users.noreply.github.com>
---
 scrapegraphai/docloaders/browser_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py
index dd290d2d..77628bc5 100644
--- a/scrapegraphai/docloaders/browser_base.py
+++ b/scrapegraphai/docloaders/browser_base.py
@@ -1,8 +1,8 @@
 """
 browserbase integration module 
 """
-from browserbase import Browserbase
 from typing import List
+from browserbase import Browserbase
 
 def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[str]:
     """
@@ -43,6 +43,6 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s
 
     browserbase = Browserbase(api_key=api_key, project_id=project_id)
 
-    result = browserbase.load(link)
+    result = browserbase.load([link])
 
     return result

From be870a43161cb2ed7f0f60553c2f3742c6b939eb Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 13:24:48 +0200
Subject: [PATCH 40/51] Update fetch_node.py

Co-Authored-By: Federico Minutoli <40361744+DiTo97@users.noreply.github.com>
---
 scrapegraphai/nodes/fetch_node.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 86b02bf6..4971ddb3 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -202,11 +202,10 @@ def execute(self, state):
                 loader_kwargs = self.node_config.get("loader_kwargs", {})
 
             if self.browser_base is not None:
-                if self.browser_base is not None:
-                    data =  browser_base_fetch(self.browser_base.get("api_key"),
-                                                self.browser_base.get("project_id"), [source])
+                data =  browser_base_fetch(self.browser_base.get("api_key"),
+                                            self.browser_base.get("project_id"), [source])
 
-                    document = [Document(page_content=content, metadata={"source": source}) for content in data]
+                document = [Document(page_content=content, metadata={"source": source}) for content in data]
             else:
                 loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
                 document = loader.load()

From 0b4cfd6522dcad0eb418f0badd0f7824a1efd534 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 14:38:50 +0200
Subject: [PATCH 41/51] fix: abstract_graph and removed unused embeddings

Co-Authored-By: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
---
 examples/bedrock/csv_scraper_bedrock.py       |  3 -
 .../csv_scraper_graph_multi_bedrock.py        |  3 -
 examples/bedrock/custom_graph_bedrock.py      |  3 -
 examples/bedrock/json_scraper_bedrock.py      |  3 -
 .../bedrock/json_scraper_multi_bedrock.py     |  3 -
 examples/bedrock/pdf_scraper_graph_bedrock.py |  3 -
 .../pdf_scraper_graph_multi_bedrock.py        |  3 -
 examples/bedrock/scrape_plain_text_bedrock.py |  3 -
 examples/bedrock/script_generator_bedrock.py  |  5 +-
 .../bedrock/script_multi_generator_bedrock.py |  5 +-
 examples/bedrock/search_graph_bedrock.py      |  3 -
 .../bedrock/search_graph_schema_bedrock.py    |  3 -
 examples/bedrock/search_link_graph_bedrock.py |  3 -
 examples/bedrock/smart_scraper_bedrock.py     |  3 -
 .../bedrock/smart_scraper_multi_bedrock.py    |  3 -
 .../bedrock/smart_scraper_schema_bedrock.py   |  3 -
 examples/bedrock/xml_scraper_bedrock.py       |  3 -
 .../xml_scraper_graph_multi_bedrock.py        |  3 -
 examples/deepseek/csv_scraper_deepseek.py     |  5 --
 .../csv_scraper_graph_multi_deepseek.py       |  5 --
 examples/deepseek/custom_graph_deepseek.py    | 89 -------------------
 examples/deepseek/json_scraper_deepseek.py    |  5 --
 .../deepseek/json_scraper_multi_deepseek.py   |  5 --
 .../deepseek/pdf_scraper_graph_deepseek.py    |  5 --
 .../deepseek/pdf_scraper_multi_deepseek.py    |  5 --
 .../deepseek/scrape_plain_text_deepseek.py    |  5 --
 .../deepseek/script_generator_deepseek.py     |  5 --
 .../script_multi_generator_deepseek.py        |  5 --
 examples/deepseek/search_graph_deepseek.py    |  5 --
 .../deepseek/search_graph_schema_deepseek.py  |  5 --
 .../deepseek/search_link_graph_deepseek.py    |  5 --
 examples/deepseek/smart_scraper_deepseek.py   |  5 --
 .../deepseek/smart_scraper_multi_deepseek.py  |  5 --
 .../deepseek/smart_scraper_schema_deepseek.py |  5 --
 examples/deepseek/xml_scraper_deepseek.py     |  5 --
 .../xml_scraper_graph_multi_deepseek.py       |  5 --
 examples/fireworks/csv_scraper_fireworks.py   |  6 --
 .../csv_scraper_graph_multi_fireworks.py      |  5 --
 examples/fireworks/custom_graph_fireworks.py  | 27 +-----
 examples/fireworks/deep_scraper_fireworks.py  |  7 +-
 .../fireworks/json_scraper_fireworkspy.py     |  5 --
 .../fireworks/json_scraper_multi_fireworks.py |  5 --
 examples/fireworks/pdf_scraper_fireworks.py   |  5 --
 .../fireworks/pdf_scraper_multi_fireworks.py  |  5 --
 .../fireworks/scrape_plain_text_fireworks.py  |  5 --
 .../fireworks/script_generator_fireworks.py   |  5 --
 .../script_generator_schema_fireworks.py      |  5 --
 .../script_multi_generator_fireworks.py       |  5 --
 examples/fireworks/search_graph_fireworks.py  |  5 --
 .../search_graph_schema_fireworks.py          |  5 --
 .../fireworks/search_link_graph_fireworks.py  |  5 --
 examples/fireworks/smart_scraper_fireworks.py |  5 --
 .../smart_scraper_multi_fireworks.py          |  6 +-
 .../smart_scraper_schema_fireworks.py         |  5 --
 examples/fireworks/xml_scraper_fireworks.py   |  5 --
 .../xml_scraper_graph_multi_fireworks.py      |  5 --
 examples/groq/csv_scraper_graph_multi_groq.py |  5 --
 examples/groq/csv_scraper_groq.py             |  5 --
 examples/groq/custom_graph_groq.py            | 22 +----
 examples/groq/json_scraper_groq.py            |  5 --
 examples/groq/json_scraper_multi_groq.py      |  5 --
 examples/groq/pdf_scraper_graph_groq.py       |  5 --
 examples/groq/pdf_scraper_multi_groq.py       |  5 --
 examples/groq/scrape_plain_text_groq.py       |  5 --
 examples/groq/script_generator_groq.py        |  5 --
 examples/groq/script_multi_generator_groq.py  |  5 --
 examples/groq/search_graph_groq.py            |  5 --
 examples/groq/search_graph_schema_groq.py     |  5 --
 examples/groq/search_link_graph_groq.py       |  5 --
 examples/groq/smart_scraper_groq.py           |  5 --
 examples/groq/smart_scraper_multi_groq.py     |  5 --
 examples/groq/smart_scraper_schema_groq.py    |  5 --
 examples/groq/xml_scraper_graph_multi_groq.py |  5 --
 examples/groq/xml_scraper_groq.py             |  5 --
 .../csv_scraper_graph_multi_huggingfacehub.py |  1 -
 .../csv_scraper_huggingfacehub.py             |  1 -
 .../custom_graph_huggingfacehub.py            |  1 -
 .../json_scraper_huggingfacehub.py            |  1 -
 .../json_scraper_multi_huggingfacehub.py      |  1 -
 .../pdf_scraper_graph_huggingfacehub.py       |  1 -
 .../pdf_scraper_multi_huggingfacehub.py       |  1 -
 .../scrape_plain_text_huggingfacehub.py       |  1 -
 .../script_generator_huggingfacehub.py        |  1 -
 .../script_multi_generator_huggingfacehub.py  |  1 -
 .../search_graph_huggingfacehub.py            |  1 -
 .../search_link_graph_huggingfacehub.py       |  1 -
 .../smart_scraper_huggingfacehub.py           |  1 -
 .../smart_scraper_multi_huggingfacehub.py     |  1 -
 .../smart_scraper_schema_huggingfacehub.py    |  1 -
 .../xml_scraper_graph_multi_huggingfacehub.py |  1 -
 .../xml_scraper_huggingfacehub.py             |  1 -
 examples/local_models/custom_graph_ollama.py  | 24 ++---
 .../local_models/json_scraper_multi_ollama.py |  4 -
 examples/local_models/json_scraper_ollama.py  |  5 --
 .../local_models/pdf_scraper_multi_ollama.py  |  4 -
 examples/local_models/pdf_scraper_ollama.py   |  4 -
 .../local_models/scrape_plain_text_ollama.py  |  5 --
 .../script_multi_generator_ollama.py          |  5 --
 examples/local_models/search_graph_ollama.py  |  5 --
 .../search_graph_schema_ollama.py             |  5 --
 .../local_models/search_link_graph_ollama.py  |  6 +-
 examples/local_models/smart_scraper_ollama.py |  6 +-
 .../smart_scraper_schema_ollama.py            |  6 +-
 .../xml_scraper_graph_multi_ollama.py         |  6 +-
 examples/local_models/xml_scraper_ollama.py   |  5 --
 scrapegraphai/graphs/abstract_graph.py        |  3 -
 106 files changed, 21 insertions(+), 554 deletions(-)
 delete mode 100644 examples/deepseek/custom_graph_deepseek.py

diff --git a/examples/bedrock/csv_scraper_bedrock.py b/examples/bedrock/csv_scraper_bedrock.py
index f015f77b..a69417c0 100644
--- a/examples/bedrock/csv_scraper_bedrock.py
+++ b/examples/bedrock/csv_scraper_bedrock.py
@@ -33,9 +33,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 # ************************************************
diff --git a/examples/bedrock/csv_scraper_graph_multi_bedrock.py b/examples/bedrock/csv_scraper_graph_multi_bedrock.py
index c776c508..b9dd7f6f 100644
--- a/examples/bedrock/csv_scraper_graph_multi_bedrock.py
+++ b/examples/bedrock/csv_scraper_graph_multi_bedrock.py
@@ -28,9 +28,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/custom_graph_bedrock.py b/examples/bedrock/custom_graph_bedrock.py
index 45358555..9002a598 100644
--- a/examples/bedrock/custom_graph_bedrock.py
+++ b/examples/bedrock/custom_graph_bedrock.py
@@ -28,9 +28,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/json_scraper_bedrock.py b/examples/bedrock/json_scraper_bedrock.py
index 0729adfe..dc1bf769 100644
--- a/examples/bedrock/json_scraper_bedrock.py
+++ b/examples/bedrock/json_scraper_bedrock.py
@@ -32,9 +32,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/json_scraper_multi_bedrock.py b/examples/bedrock/json_scraper_multi_bedrock.py
index 5dc666b8..5848ef17 100644
--- a/examples/bedrock/json_scraper_multi_bedrock.py
+++ b/examples/bedrock/json_scraper_multi_bedrock.py
@@ -10,9 +10,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 FILE_NAME = "inputs/example.json"
diff --git a/examples/bedrock/pdf_scraper_graph_bedrock.py b/examples/bedrock/pdf_scraper_graph_bedrock.py
index 6ee4b753..dcef848e 100644
--- a/examples/bedrock/pdf_scraper_graph_bedrock.py
+++ b/examples/bedrock/pdf_scraper_graph_bedrock.py
@@ -18,9 +18,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/pdf_scraper_graph_multi_bedrock.py b/examples/bedrock/pdf_scraper_graph_multi_bedrock.py
index 7102c406..37e61c42 100644
--- a/examples/bedrock/pdf_scraper_graph_multi_bedrock.py
+++ b/examples/bedrock/pdf_scraper_graph_multi_bedrock.py
@@ -11,9 +11,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 # ***************
diff --git a/examples/bedrock/scrape_plain_text_bedrock.py b/examples/bedrock/scrape_plain_text_bedrock.py
index 01bec609..0214a1e3 100644
--- a/examples/bedrock/scrape_plain_text_bedrock.py
+++ b/examples/bedrock/scrape_plain_text_bedrock.py
@@ -33,9 +33,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/script_generator_bedrock.py b/examples/bedrock/script_generator_bedrock.py
index 0d3f7d07..26863193 100644
--- a/examples/bedrock/script_generator_bedrock.py
+++ b/examples/bedrock/script_generator_bedrock.py
@@ -19,10 +19,7 @@
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
     },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
-    },
-        "library": "beautifulsoup"
+    "library": "beautifulsoup"
 }
 
 # ************************************************
diff --git a/examples/bedrock/script_multi_generator_bedrock.py b/examples/bedrock/script_multi_generator_bedrock.py
index 2f892546..ecef966d 100644
--- a/examples/bedrock/script_multi_generator_bedrock.py
+++ b/examples/bedrock/script_multi_generator_bedrock.py
@@ -15,10 +15,7 @@
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
     },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
-    },
-        "library": "beautifulsoup"
+    "library": "beautifulsoup"
 }
 
 # ************************************************
diff --git a/examples/bedrock/search_graph_bedrock.py b/examples/bedrock/search_graph_bedrock.py
index 9b32d3db..b27f6e5d 100644
--- a/examples/bedrock/search_graph_bedrock.py
+++ b/examples/bedrock/search_graph_bedrock.py
@@ -16,9 +16,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 # ************************************************
diff --git a/examples/bedrock/search_graph_schema_bedrock.py b/examples/bedrock/search_graph_schema_bedrock.py
index 90539155..a49ba730 100644
--- a/examples/bedrock/search_graph_schema_bedrock.py
+++ b/examples/bedrock/search_graph_schema_bedrock.py
@@ -27,9 +27,6 @@ class Dishes(BaseModel):
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/search_link_graph_bedrock.py b/examples/bedrock/search_link_graph_bedrock.py
index 116dea01..fc1e6233 100644
--- a/examples/bedrock/search_link_graph_bedrock.py
+++ b/examples/bedrock/search_link_graph_bedrock.py
@@ -15,9 +15,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/smart_scraper_bedrock.py b/examples/bedrock/smart_scraper_bedrock.py
index 03394434..9c747c00 100644
--- a/examples/bedrock/smart_scraper_bedrock.py
+++ b/examples/bedrock/smart_scraper_bedrock.py
@@ -19,9 +19,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/smart_scraper_multi_bedrock.py b/examples/bedrock/smart_scraper_multi_bedrock.py
index 7aeb71cd..b363d6ab 100644
--- a/examples/bedrock/smart_scraper_multi_bedrock.py
+++ b/examples/bedrock/smart_scraper_multi_bedrock.py
@@ -17,9 +17,6 @@
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/smart_scraper_schema_bedrock.py b/examples/bedrock/smart_scraper_schema_bedrock.py
index 6213ea1f..2829efec 100644
--- a/examples/bedrock/smart_scraper_schema_bedrock.py
+++ b/examples/bedrock/smart_scraper_schema_bedrock.py
@@ -26,9 +26,6 @@ class Projects(BaseModel):
         "client": "client_name",
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
-    },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
     }
 }
 
diff --git a/examples/bedrock/xml_scraper_bedrock.py b/examples/bedrock/xml_scraper_bedrock.py
index 018a8387..5f81fbf6 100644
--- a/examples/bedrock/xml_scraper_bedrock.py
+++ b/examples/bedrock/xml_scraper_bedrock.py
@@ -32,9 +32,6 @@
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
     },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
-    }
 }
 
 # ************************************************
diff --git a/examples/bedrock/xml_scraper_graph_multi_bedrock.py b/examples/bedrock/xml_scraper_graph_multi_bedrock.py
index a0ed3560..638ce280 100644
--- a/examples/bedrock/xml_scraper_graph_multi_bedrock.py
+++ b/examples/bedrock/xml_scraper_graph_multi_bedrock.py
@@ -29,9 +29,6 @@
         "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
         "temperature": 0.0
     },
-    "embeddings": {
-        "model": "bedrock/cohere.embed-multilingual-v3"
-    }
 }
 
 # ************************************************
diff --git a/examples/deepseek/csv_scraper_deepseek.py b/examples/deepseek/csv_scraper_deepseek.py
index fd55469d..b734b543 100644
--- a/examples/deepseek/csv_scraper_deepseek.py
+++ b/examples/deepseek/csv_scraper_deepseek.py
@@ -30,11 +30,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/csv_scraper_graph_multi_deepseek.py b/examples/deepseek/csv_scraper_graph_multi_deepseek.py
index d665bc31..ea5e9154 100644
--- a/examples/deepseek/csv_scraper_graph_multi_deepseek.py
+++ b/examples/deepseek/csv_scraper_graph_multi_deepseek.py
@@ -30,11 +30,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/custom_graph_deepseek.py b/examples/deepseek/custom_graph_deepseek.py
deleted file mode 100644
index a265db95..00000000
--- a/examples/deepseek/custom_graph_deepseek.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""
-Example of custom graph using Gemini Google model
-"""
-
-import os
-from dotenv import load_dotenv
-from scrapegraphai.models import Gemini
-from scrapegraphai.graphs import BaseGraph
-from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode
-load_dotenv()
-
-# ************************************************
-# Define the configuration for the graph
-# ************************************************
-
-deepseek_key = os.getenv("DEEPSEEK_APIKEY")
-
-graph_config = {
-    "llm": {
-        "model": "deepseek-chat",
-        "openai_api_key": deepseek_key,
-        "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
-    "verbose": True,
-}
-
-# ************************************************
-# Define the graph nodes
-# ************************************************
-
-llm_model = Gemini(graph_config["llm"])
-
-# define the nodes for the graph
-fetch_node = FetchNode(
-    input="url | local_dir",
-    output=["doc"],
-)
-parse_node = ParseNode(
-    input="doc",
-    output=["parsed_doc"],
-    node_config={"chunk_size": 4096}
-)
-rag_node = RAGNode(
-    input="user_prompt & (parsed_doc | doc)",
-    output=["relevant_chunks"],
-    node_config={"llm": llm_model},
-)
-generate_answer_node = GenerateAnswerNode(
-    input="user_prompt & (relevant_chunks | parsed_doc | doc)",
-    output=["answer"],
-    node_config={"llm": llm_model},
-)
-
-# ************************************************
-# Create the graph by defining the connections
-# ************************************************
-
-graph = BaseGraph(
-    nodes={
-        fetch_node,
-        parse_node,
-        rag_node,
-        generate_answer_node,
-    },
-    edges={
-        (fetch_node, parse_node),
-        (parse_node, rag_node),
-        (rag_node, generate_answer_node)
-    },
-    entry_point=fetch_node
-)
-
-# ************************************************
-# Execute the graph
-# ************************************************
-
-result, execution_info = graph.execute({
-    "user_prompt": "List me the projects with their description",
-    "url": "https://perinim.github.io/projects/"
-})
-
-# get the answer from the result
-result = result.get("answer", "No answer found.")
-print(result)
diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py
index 696a08d9..dfe6f489 100644
--- a/examples/deepseek/json_scraper_deepseek.py
+++ b/examples/deepseek/json_scraper_deepseek.py
@@ -29,11 +29,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/json_scraper_multi_deepseek.py b/examples/deepseek/json_scraper_multi_deepseek.py
index 17660ddb..b957dde0 100644
--- a/examples/deepseek/json_scraper_multi_deepseek.py
+++ b/examples/deepseek/json_scraper_multi_deepseek.py
@@ -15,11 +15,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py
index fe6f2658..d66bbef5 100644
--- a/examples/deepseek/pdf_scraper_graph_deepseek.py
+++ b/examples/deepseek/pdf_scraper_graph_deepseek.py
@@ -20,11 +20,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py
index c884b798..211e4635 100644
--- a/examples/deepseek/pdf_scraper_multi_deepseek.py
+++ b/examples/deepseek/pdf_scraper_multi_deepseek.py
@@ -15,11 +15,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/scrape_plain_text_deepseek.py b/examples/deepseek/scrape_plain_text_deepseek.py
index 7076dd39..d7a070d7 100644
--- a/examples/deepseek/scrape_plain_text_deepseek.py
+++ b/examples/deepseek/scrape_plain_text_deepseek.py
@@ -31,11 +31,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/script_generator_deepseek.py b/examples/deepseek/script_generator_deepseek.py
index 09db0876..fd5fd4dd 100644
--- a/examples/deepseek/script_generator_deepseek.py
+++ b/examples/deepseek/script_generator_deepseek.py
@@ -20,11 +20,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/deepseek/script_multi_generator_deepseek.py b/examples/deepseek/script_multi_generator_deepseek.py
index 41e363b5..2ebfd90a 100644
--- a/examples/deepseek/script_multi_generator_deepseek.py
+++ b/examples/deepseek/script_multi_generator_deepseek.py
@@ -20,11 +20,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/deepseek/search_graph_deepseek.py b/examples/deepseek/search_graph_deepseek.py
index d607e1b1..176d6107 100644
--- a/examples/deepseek/search_graph_deepseek.py
+++ b/examples/deepseek/search_graph_deepseek.py
@@ -18,11 +18,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "max_results": 2,
     "verbose": True,
diff --git a/examples/deepseek/search_graph_schema_deepseek.py b/examples/deepseek/search_graph_schema_deepseek.py
index 8debee2f..f5db278e 100644
--- a/examples/deepseek/search_graph_schema_deepseek.py
+++ b/examples/deepseek/search_graph_schema_deepseek.py
@@ -34,11 +34,6 @@ class Dishes(BaseModel):
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/search_link_graph_deepseek.py b/examples/deepseek/search_link_graph_deepseek.py
index 30e4a9b3..6a35f177 100644
--- a/examples/deepseek/search_link_graph_deepseek.py
+++ b/examples/deepseek/search_link_graph_deepseek.py
@@ -19,11 +19,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/smart_scraper_deepseek.py b/examples/deepseek/smart_scraper_deepseek.py
index 9fe00a2a..ed291b02 100644
--- a/examples/deepseek/smart_scraper_deepseek.py
+++ b/examples/deepseek/smart_scraper_deepseek.py
@@ -21,11 +21,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/smart_scraper_multi_deepseek.py b/examples/deepseek/smart_scraper_multi_deepseek.py
index c88ab525..fafe7261 100644
--- a/examples/deepseek/smart_scraper_multi_deepseek.py
+++ b/examples/deepseek/smart_scraper_multi_deepseek.py
@@ -19,11 +19,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/smart_scraper_schema_deepseek.py b/examples/deepseek/smart_scraper_schema_deepseek.py
index a16ae575..5cbbb702 100644
--- a/examples/deepseek/smart_scraper_schema_deepseek.py
+++ b/examples/deepseek/smart_scraper_schema_deepseek.py
@@ -33,11 +33,6 @@ class Projects(BaseModel):
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/xml_scraper_deepseek.py b/examples/deepseek/xml_scraper_deepseek.py
index 3b2af61b..ba401b91 100644
--- a/examples/deepseek/xml_scraper_deepseek.py
+++ b/examples/deepseek/xml_scraper_deepseek.py
@@ -31,11 +31,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/deepseek/xml_scraper_graph_multi_deepseek.py b/examples/deepseek/xml_scraper_graph_multi_deepseek.py
index 5d3c29d5..0f53a6b2 100644
--- a/examples/deepseek/xml_scraper_graph_multi_deepseek.py
+++ b/examples/deepseek/xml_scraper_graph_multi_deepseek.py
@@ -30,11 +30,6 @@
         "model": "deepseek-chat",
         "openai_api_key": deepseek_key,
         "openai_api_base": 'https://api.deepseek.com/v1',
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/fireworks/csv_scraper_fireworks.py b/examples/fireworks/csv_scraper_fireworks.py
index b1d7526d..f588c4c5 100644
--- a/examples/fireworks/csv_scraper_fireworks.py
+++ b/examples/fireworks/csv_scraper_fireworks.py
@@ -29,12 +29,6 @@
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
     },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
-    "verbose": True,
     "headless": False,
 }
 
diff --git a/examples/fireworks/csv_scraper_graph_multi_fireworks.py b/examples/fireworks/csv_scraper_graph_multi_fireworks.py
index 81393d60..ebc46e61 100644
--- a/examples/fireworks/csv_scraper_graph_multi_fireworks.py
+++ b/examples/fireworks/csv_scraper_graph_multi_fireworks.py
@@ -28,11 +28,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/custom_graph_fireworks.py b/examples/fireworks/custom_graph_fireworks.py
index a02b774e..d0dcd994 100644
--- a/examples/fireworks/custom_graph_fireworks.py
+++ b/examples/fireworks/custom_graph_fireworks.py
@@ -4,9 +4,7 @@
 
 import os
 from dotenv import load_dotenv
-
-from langchain_openai import OpenAIEmbeddings
-from scrapegraphai.models import OpenAI
+from langchain_openai import ChatOpenAI
 from scrapegraphai.graphs import BaseGraph
 from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
 load_dotenv()
@@ -21,11 +19,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
@@ -35,8 +28,7 @@
 # Define the graph nodes
 # ************************************************
 
-llm_model = OpenAI(graph_config["llm"])
-embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
+llm_model = ChatOpenAI(graph_config["llm"])
 
 # define the nodes for the graph
 robot_node = RobotsNode(
@@ -65,15 +57,7 @@
         "verbose": True,
     }
 )
-rag_node = RAGNode(
-    input="user_prompt & (parsed_doc | doc)",
-    output=["relevant_chunks"],
-    node_config={
-        "llm_model": llm_model,
-        "embedder_model": embedder,
-        "verbose": True,
-    }
-)
+
 generate_answer_node = GenerateAnswerNode(
     input="user_prompt & (relevant_chunks | parsed_doc | doc)",
     output=["answer"],
@@ -92,14 +76,11 @@
         robot_node,
         fetch_node,
         parse_node,
-        rag_node,
-        generate_answer_node,
     ],
     edges=[
         (robot_node, fetch_node),
         (fetch_node, parse_node),
-        (parse_node, rag_node),
-        (rag_node, generate_answer_node)
+        (parse_node, generate_answer_node)
     ],
     entry_point=robot_node
 )
diff --git a/examples/fireworks/deep_scraper_fireworks.py b/examples/fireworks/deep_scraper_fireworks.py
index 67a80868..86fb1717 100644
--- a/examples/fireworks/deep_scraper_fireworks.py
+++ b/examples/fireworks/deep_scraper_fireworks.py
@@ -19,11 +19,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "max_depth": 1
@@ -49,4 +44,4 @@
 
 graph_exec_info = deep_scraper_graph.get_execution_info()
 print(deep_scraper_graph.get_state("relevant_links"))
-print(prettify_exec_info(graph_exec_info))
\ No newline at end of file
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/fireworks/json_scraper_fireworkspy.py b/examples/fireworks/json_scraper_fireworkspy.py
index 0dd188fb..a76a89c5 100644
--- a/examples/fireworks/json_scraper_fireworkspy.py
+++ b/examples/fireworks/json_scraper_fireworkspy.py
@@ -29,11 +29,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/json_scraper_multi_fireworks.py b/examples/fireworks/json_scraper_multi_fireworks.py
index b4cf4fc7..cd16c525 100644
--- a/examples/fireworks/json_scraper_multi_fireworks.py
+++ b/examples/fireworks/json_scraper_multi_fireworks.py
@@ -14,11 +14,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/pdf_scraper_fireworks.py b/examples/fireworks/pdf_scraper_fireworks.py
index 20db556b..3bb3f3d4 100644
--- a/examples/fireworks/pdf_scraper_fireworks.py
+++ b/examples/fireworks/pdf_scraper_fireworks.py
@@ -15,11 +15,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/fireworks/pdf_scraper_multi_fireworks.py b/examples/fireworks/pdf_scraper_multi_fireworks.py
index 891a4454..c1077061 100644
--- a/examples/fireworks/pdf_scraper_multi_fireworks.py
+++ b/examples/fireworks/pdf_scraper_multi_fireworks.py
@@ -20,11 +20,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/fireworks/scrape_plain_text_fireworks.py b/examples/fireworks/scrape_plain_text_fireworks.py
index a45b2691..331f05e2 100644
--- a/examples/fireworks/scrape_plain_text_fireworks.py
+++ b/examples/fireworks/scrape_plain_text_fireworks.py
@@ -32,11 +32,6 @@
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
     },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
 }
 
 
diff --git a/examples/fireworks/script_generator_fireworks.py b/examples/fireworks/script_generator_fireworks.py
index dea59e12..2ee3294c 100644
--- a/examples/fireworks/script_generator_fireworks.py
+++ b/examples/fireworks/script_generator_fireworks.py
@@ -19,11 +19,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/script_generator_schema_fireworks.py b/examples/fireworks/script_generator_schema_fireworks.py
index f7aa4c83..6355a4e8 100644
--- a/examples/fireworks/script_generator_schema_fireworks.py
+++ b/examples/fireworks/script_generator_schema_fireworks.py
@@ -32,11 +32,6 @@ class Projects(BaseModel):
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "library": "beautifulsoup",
diff --git a/examples/fireworks/script_multi_generator_fireworks.py b/examples/fireworks/script_multi_generator_fireworks.py
index 42aff923..98671768 100644
--- a/examples/fireworks/script_multi_generator_fireworks.py
+++ b/examples/fireworks/script_multi_generator_fireworks.py
@@ -19,11 +19,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "library": "beautifulsoup",
diff --git a/examples/fireworks/search_graph_fireworks.py b/examples/fireworks/search_graph_fireworks.py
index 4d4d33cb..a091190c 100644
--- a/examples/fireworks/search_graph_fireworks.py
+++ b/examples/fireworks/search_graph_fireworks.py
@@ -18,11 +18,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "max_results": 2,
     "verbose": True,
diff --git a/examples/fireworks/search_graph_schema_fireworks.py b/examples/fireworks/search_graph_schema_fireworks.py
index 9180522b..d88d991e 100644
--- a/examples/fireworks/search_graph_schema_fireworks.py
+++ b/examples/fireworks/search_graph_schema_fireworks.py
@@ -33,11 +33,6 @@ class Dishes(BaseModel):
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "max_results": 2,
     "verbose": True,
diff --git a/examples/fireworks/search_link_graph_fireworks.py b/examples/fireworks/search_link_graph_fireworks.py
index a1d3a979..e71e2a4f 100644
--- a/examples/fireworks/search_link_graph_fireworks.py
+++ b/examples/fireworks/search_link_graph_fireworks.py
@@ -18,11 +18,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "max_results": 2,
     "verbose": True,
diff --git a/examples/fireworks/smart_scraper_fireworks.py b/examples/fireworks/smart_scraper_fireworks.py
index 40071d8f..cff9aedb 100644
--- a/examples/fireworks/smart_scraper_fireworks.py
+++ b/examples/fireworks/smart_scraper_fireworks.py
@@ -20,11 +20,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/smart_scraper_multi_fireworks.py b/examples/fireworks/smart_scraper_multi_fireworks.py
index 68e28055..09e2c811 100644
--- a/examples/fireworks/smart_scraper_multi_fireworks.py
+++ b/examples/fireworks/smart_scraper_multi_fireworks.py
@@ -19,11 +19,7 @@
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
     },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
+  
     "verbose": True,
     "headless": False,
 }
diff --git a/examples/fireworks/smart_scraper_schema_fireworks.py b/examples/fireworks/smart_scraper_schema_fireworks.py
index b8685c3e..d71593f3 100644
--- a/examples/fireworks/smart_scraper_schema_fireworks.py
+++ b/examples/fireworks/smart_scraper_schema_fireworks.py
@@ -31,11 +31,6 @@ class Projects(BaseModel):
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/xml_scraper_fireworks.py b/examples/fireworks/xml_scraper_fireworks.py
index efc98bd8..59d9e6a3 100644
--- a/examples/fireworks/xml_scraper_fireworks.py
+++ b/examples/fireworks/xml_scraper_fireworks.py
@@ -28,11 +28,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/fireworks/xml_scraper_graph_multi_fireworks.py b/examples/fireworks/xml_scraper_graph_multi_fireworks.py
index d14b8db0..690836a4 100644
--- a/examples/fireworks/xml_scraper_graph_multi_fireworks.py
+++ b/examples/fireworks/xml_scraper_graph_multi_fireworks.py
@@ -29,11 +29,6 @@
     "llm": {
         "api_key": fireworks_api_key,
         "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/groq/csv_scraper_graph_multi_groq.py b/examples/groq/csv_scraper_graph_multi_groq.py
index 87e3279c..475b8cac 100644
--- a/examples/groq/csv_scraper_graph_multi_groq.py
+++ b/examples/groq/csv_scraper_graph_multi_groq.py
@@ -30,11 +30,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/csv_scraper_groq.py b/examples/groq/csv_scraper_groq.py
index 20839a75..805ce5fc 100644
--- a/examples/groq/csv_scraper_groq.py
+++ b/examples/groq/csv_scraper_groq.py
@@ -31,11 +31,6 @@
         "api_key": groq_key,
         "temperature": 0
     },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
 }
 # ************************************************
 # Create the CSVScraperGraph instance and run it
diff --git a/examples/groq/custom_graph_groq.py b/examples/groq/custom_graph_groq.py
index d0384ffd..79d2f0c6 100644
--- a/examples/groq/custom_graph_groq.py
+++ b/examples/groq/custom_graph_groq.py
@@ -4,7 +4,7 @@
 
 import os
 from dotenv import load_dotenv
-from scrapegraphai.models import OpenAI
+from langchain_openai import ChatOpenAI
 from scrapegraphai.graphs import BaseGraph
 from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
 load_dotenv()
@@ -19,11 +19,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False
@@ -33,7 +28,7 @@
 # Define the graph nodes
 # ************************************************
 
-llm_model = OpenAI(graph_config["llm"])
+llm_model = ChatOpenAI(graph_config["llm"])
 
 # define the nodes for the graph
 robot_node = RobotsNode(
@@ -62,14 +57,7 @@
         "verbose": True,
     }
 )
-rag_node = RAGNode(
-    input="user_prompt & (parsed_doc | doc)",
-    output=["relevant_chunks"],
-    node_config={
-        "llm_model": llm_model,
-        "verbose": True,
-    }
-)
+
 generate_answer_node = GenerateAnswerNode(
     input="user_prompt & (relevant_chunks | parsed_doc | doc)",
     output=["answer"],
@@ -88,14 +76,12 @@
         robot_node,
         fetch_node,
         parse_node,
-        rag_node,
         generate_answer_node,
     ],
     edges=[
         (robot_node, fetch_node),
         (fetch_node, parse_node),
-        (parse_node, rag_node),
-        (rag_node, generate_answer_node)
+        (parse_node, generate_answer_node)
     ],
     entry_point=robot_node
 )
diff --git a/examples/groq/json_scraper_groq.py b/examples/groq/json_scraper_groq.py
index 3faddae8..a9099069 100644
--- a/examples/groq/json_scraper_groq.py
+++ b/examples/groq/json_scraper_groq.py
@@ -30,11 +30,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False
diff --git a/examples/groq/json_scraper_multi_groq.py b/examples/groq/json_scraper_multi_groq.py
index 13b49be6..df3b9276 100644
--- a/examples/groq/json_scraper_multi_groq.py
+++ b/examples/groq/json_scraper_multi_groq.py
@@ -15,11 +15,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/groq/pdf_scraper_graph_groq.py b/examples/groq/pdf_scraper_graph_groq.py
index a9ca57ee..2560c11e 100644
--- a/examples/groq/pdf_scraper_graph_groq.py
+++ b/examples/groq/pdf_scraper_graph_groq.py
@@ -18,11 +18,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
 }
diff --git a/examples/groq/pdf_scraper_multi_groq.py b/examples/groq/pdf_scraper_multi_groq.py
index f1afc058..c43a7087 100644
--- a/examples/groq/pdf_scraper_multi_groq.py
+++ b/examples/groq/pdf_scraper_multi_groq.py
@@ -14,11 +14,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/groq/scrape_plain_text_groq.py b/examples/groq/scrape_plain_text_groq.py
index 73cda250..329df51f 100644
--- a/examples/groq/scrape_plain_text_groq.py
+++ b/examples/groq/scrape_plain_text_groq.py
@@ -32,11 +32,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False
diff --git a/examples/groq/script_generator_groq.py b/examples/groq/script_generator_groq.py
index a370eb3c..9e280e2b 100644
--- a/examples/groq/script_generator_groq.py
+++ b/examples/groq/script_generator_groq.py
@@ -19,11 +19,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/groq/script_multi_generator_groq.py b/examples/groq/script_multi_generator_groq.py
index 1757a3de..31f4041e 100644
--- a/examples/groq/script_multi_generator_groq.py
+++ b/examples/groq/script_multi_generator_groq.py
@@ -20,11 +20,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/groq/search_graph_groq.py b/examples/groq/search_graph_groq.py
index e82ffb7c..e3044c0e 100644
--- a/examples/groq/search_graph_groq.py
+++ b/examples/groq/search_graph_groq.py
@@ -21,11 +21,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/search_graph_schema_groq.py b/examples/groq/search_graph_schema_groq.py
index 41f03dc4..4cc2209d 100644
--- a/examples/groq/search_graph_schema_groq.py
+++ b/examples/groq/search_graph_schema_groq.py
@@ -34,11 +34,6 @@ class Dishes(BaseModel):
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/search_link_graph_groq.py b/examples/groq/search_link_graph_groq.py
index f940c2a4..5d82f37f 100644
--- a/examples/groq/search_link_graph_groq.py
+++ b/examples/groq/search_link_graph_groq.py
@@ -19,11 +19,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/smart_scraper_groq.py b/examples/groq/smart_scraper_groq.py
index f828cdec..ab38edc0 100644
--- a/examples/groq/smart_scraper_groq.py
+++ b/examples/groq/smart_scraper_groq.py
@@ -20,11 +20,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/smart_scraper_multi_groq.py b/examples/groq/smart_scraper_multi_groq.py
index 18ba3992..6ead098c 100644
--- a/examples/groq/smart_scraper_multi_groq.py
+++ b/examples/groq/smart_scraper_multi_groq.py
@@ -19,11 +19,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False
diff --git a/examples/groq/smart_scraper_schema_groq.py b/examples/groq/smart_scraper_schema_groq.py
index e0c51c98..f9c1a40b 100644
--- a/examples/groq/smart_scraper_schema_groq.py
+++ b/examples/groq/smart_scraper_schema_groq.py
@@ -33,11 +33,6 @@ class Projects(BaseModel):
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/xml_scraper_graph_multi_groq.py b/examples/groq/xml_scraper_graph_multi_groq.py
index 7b102c0f..62540671 100644
--- a/examples/groq/xml_scraper_graph_multi_groq.py
+++ b/examples/groq/xml_scraper_graph_multi_groq.py
@@ -30,11 +30,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "headless": False
 }
diff --git a/examples/groq/xml_scraper_groq.py b/examples/groq/xml_scraper_groq.py
index 1c086175..2172ea77 100644
--- a/examples/groq/xml_scraper_groq.py
+++ b/examples/groq/xml_scraper_groq.py
@@ -30,11 +30,6 @@
         "model": "groq/gemma-7b-it",
         "api_key": groq_key,
         "temperature": 0
-    },
-     "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
     "verbose": True,
     "headless": False
diff --git a/examples/huggingfacehub/csv_scraper_graph_multi_huggingfacehub.py b/examples/huggingfacehub/csv_scraper_graph_multi_huggingfacehub.py
index 4517bbe9..48b04dab 100644
--- a/examples/huggingfacehub/csv_scraper_graph_multi_huggingfacehub.py
+++ b/examples/huggingfacehub/csv_scraper_graph_multi_huggingfacehub.py
@@ -40,7 +40,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 
diff --git a/examples/huggingfacehub/csv_scraper_huggingfacehub.py b/examples/huggingfacehub/csv_scraper_huggingfacehub.py
index 9d1dbe0b..18ce1194 100644
--- a/examples/huggingfacehub/csv_scraper_huggingfacehub.py
+++ b/examples/huggingfacehub/csv_scraper_huggingfacehub.py
@@ -43,7 +43,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/custom_graph_huggingfacehub.py b/examples/huggingfacehub/custom_graph_huggingfacehub.py
index ad903b5d..0c392cc1 100644
--- a/examples/huggingfacehub/custom_graph_huggingfacehub.py
+++ b/examples/huggingfacehub/custom_graph_huggingfacehub.py
@@ -33,7 +33,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/json_scraper_huggingfacehub.py b/examples/huggingfacehub/json_scraper_huggingfacehub.py
index 3a9a163d..d709cc0d 100644
--- a/examples/huggingfacehub/json_scraper_huggingfacehub.py
+++ b/examples/huggingfacehub/json_scraper_huggingfacehub.py
@@ -43,7 +43,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/json_scraper_multi_huggingfacehub.py b/examples/huggingfacehub/json_scraper_multi_huggingfacehub.py
index 8ca3ba51..c029431e 100644
--- a/examples/huggingfacehub/json_scraper_multi_huggingfacehub.py
+++ b/examples/huggingfacehub/json_scraper_multi_huggingfacehub.py
@@ -24,7 +24,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 FILE_NAME = "inputs/example.json"
 curr_dir = os.path.dirname(os.path.realpath(__file__))
diff --git a/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py b/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py
index bb2724fe..eb0b1895 100644
--- a/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py
+++ b/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py
@@ -25,7 +25,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 source = """
diff --git a/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py b/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py
index d24d522c..4db809b2 100644
--- a/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py
+++ b/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py
@@ -23,7 +23,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # Covert to list
diff --git a/examples/huggingfacehub/scrape_plain_text_huggingfacehub.py b/examples/huggingfacehub/scrape_plain_text_huggingfacehub.py
index f07e5666..76d32cda 100644
--- a/examples/huggingfacehub/scrape_plain_text_huggingfacehub.py
+++ b/examples/huggingfacehub/scrape_plain_text_huggingfacehub.py
@@ -45,7 +45,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/script_generator_huggingfacehub.py b/examples/huggingfacehub/script_generator_huggingfacehub.py
index 4804db93..a3fcaaf4 100644
--- a/examples/huggingfacehub/script_generator_huggingfacehub.py
+++ b/examples/huggingfacehub/script_generator_huggingfacehub.py
@@ -36,7 +36,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 # ************************************************
 # Create the ScriptCreatorGraph instance and run it
diff --git a/examples/huggingfacehub/script_multi_generator_huggingfacehub.py b/examples/huggingfacehub/script_multi_generator_huggingfacehub.py
index 5afeff0d..0ee89189 100644
--- a/examples/huggingfacehub/script_multi_generator_huggingfacehub.py
+++ b/examples/huggingfacehub/script_multi_generator_huggingfacehub.py
@@ -33,7 +33,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/search_graph_huggingfacehub.py b/examples/huggingfacehub/search_graph_huggingfacehub.py
index b3c58ce5..7c4a0c43 100644
--- a/examples/huggingfacehub/search_graph_huggingfacehub.py
+++ b/examples/huggingfacehub/search_graph_huggingfacehub.py
@@ -29,7 +29,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/search_link_graph_huggingfacehub.py b/examples/huggingfacehub/search_link_graph_huggingfacehub.py
index a49fb3b9..75b41282 100644
--- a/examples/huggingfacehub/search_link_graph_huggingfacehub.py
+++ b/examples/huggingfacehub/search_link_graph_huggingfacehub.py
@@ -26,7 +26,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 
diff --git a/examples/huggingfacehub/smart_scraper_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_huggingfacehub.py
index bd415d41..6f9a863f 100644
--- a/examples/huggingfacehub/smart_scraper_huggingfacehub.py
+++ b/examples/huggingfacehub/smart_scraper_huggingfacehub.py
@@ -38,7 +38,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 smart_scraper_graph = SmartScraperGraph(
diff --git a/examples/huggingfacehub/smart_scraper_multi_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_multi_huggingfacehub.py
index e1a332f9..046883a2 100644
--- a/examples/huggingfacehub/smart_scraper_multi_huggingfacehub.py
+++ b/examples/huggingfacehub/smart_scraper_multi_huggingfacehub.py
@@ -28,7 +28,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # *******************************************************
diff --git a/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py b/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
index 784079e4..31719697 100644
--- a/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
+++ b/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
@@ -48,7 +48,6 @@ class Projects(BaseModel):
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 smart_scraper_graph = SmartScraperGraph(
diff --git a/examples/huggingfacehub/xml_scraper_graph_multi_huggingfacehub.py b/examples/huggingfacehub/xml_scraper_graph_multi_huggingfacehub.py
index 24d6babd..1a244b86 100644
--- a/examples/huggingfacehub/xml_scraper_graph_multi_huggingfacehub.py
+++ b/examples/huggingfacehub/xml_scraper_graph_multi_huggingfacehub.py
@@ -40,7 +40,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/huggingfacehub/xml_scraper_huggingfacehub.py b/examples/huggingfacehub/xml_scraper_huggingfacehub.py
index cc8a4425..ddd73b5f 100644
--- a/examples/huggingfacehub/xml_scraper_huggingfacehub.py
+++ b/examples/huggingfacehub/xml_scraper_huggingfacehub.py
@@ -40,7 +40,6 @@
 
 graph_config = {
     "llm": {"model_instance": llm_model_instance},
-    "embeddings": {"model_instance": embedder_model_instance}
 }
 
 # ************************************************
diff --git a/examples/local_models/custom_graph_ollama.py b/examples/local_models/custom_graph_ollama.py
index b9a42949..66dd59b6 100644
--- a/examples/local_models/custom_graph_ollama.py
+++ b/examples/local_models/custom_graph_ollama.py
@@ -4,7 +4,7 @@
 
 import os
 from langchain_openai import OpenAIEmbeddings
-from scrapegraphai.models import OpenAI
+from langchain_openai import ChatOpenAI
 from scrapegraphai.graphs import BaseGraph
 from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
 
@@ -20,11 +20,7 @@
         # "model_tokens": 2000, # set context length arbitrarily
         "base_url": "http://localhost:11434",
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",
-    },
+  
     "verbose": True,
 }
 
@@ -32,7 +28,7 @@
 # Define the graph nodes
 # ************************************************
 
-llm_model = OpenAI(graph_config["llm"])
+llm_model = ChatOpenAI(graph_config["llm"])
 embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
 
 # define the nodes for the graph
@@ -62,15 +58,7 @@
         "verbose": True,
     }
 )
-rag_node = RAGNode(
-    input="user_prompt & (parsed_doc | doc)",
-    output=["relevant_chunks"],
-    node_config={
-        "llm_model": llm_model,
-        "embedder_model": embedder,
-        "verbose": True,
-    }
-)
+
 generate_answer_node = GenerateAnswerNode(
     input="user_prompt & (relevant_chunks | parsed_doc | doc)",
     output=["answer"],
@@ -89,14 +77,12 @@
         robot_node,
         fetch_node,
         parse_node,
-        rag_node,
         generate_answer_node,
     ],
     edges=[
         (robot_node, fetch_node),
         (fetch_node, parse_node),
-        (parse_node, rag_node),
-        (rag_node, generate_answer_node)
+        (parse_node, generate_answer_node)
     ],
     entry_point=robot_node
 )
diff --git a/examples/local_models/json_scraper_multi_ollama.py b/examples/local_models/json_scraper_multi_ollama.py
index 91f4fab4..6e9c3da3 100644
--- a/examples/local_models/json_scraper_multi_ollama.py
+++ b/examples/local_models/json_scraper_multi_ollama.py
@@ -12,10 +12,6 @@
         "format": "json",  # Ollama needs the format to be specified explicitly
         "model_tokens": 4000,
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-    },
     "verbose": True,
     "headless": False,
 }
diff --git a/examples/local_models/json_scraper_ollama.py b/examples/local_models/json_scraper_ollama.py
index 2dd072ac..ca4eb32e 100644
--- a/examples/local_models/json_scraper_ollama.py
+++ b/examples/local_models/json_scraper_ollama.py
@@ -31,11 +31,6 @@
         # "model_tokens": 2000, # set context length arbitrarily
         "base_url": "http://localhost:11434",
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",
-    },
     "verbose": True,
 }
 
diff --git a/examples/local_models/pdf_scraper_multi_ollama.py b/examples/local_models/pdf_scraper_multi_ollama.py
index c0b65a63..ce258bf6 100644
--- a/examples/local_models/pdf_scraper_multi_ollama.py
+++ b/examples/local_models/pdf_scraper_multi_ollama.py
@@ -11,10 +11,6 @@
         "format": "json",  # Ollama needs the format to be specified explicitly
         "model_tokens": 4000,
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-    },
     "verbose": True,
 }
 
diff --git a/examples/local_models/pdf_scraper_ollama.py b/examples/local_models/pdf_scraper_ollama.py
index d79afb3a..84eb40f9 100644
--- a/examples/local_models/pdf_scraper_ollama.py
+++ b/examples/local_models/pdf_scraper_ollama.py
@@ -10,10 +10,6 @@
         "format": "json",  # Ollama needs the format to be specified explicitly
         "model_tokens": 4000,
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-    },
     "verbose": True,
     "headless": False,
 }
diff --git a/examples/local_models/scrape_plain_text_ollama.py b/examples/local_models/scrape_plain_text_ollama.py
index 9700d713..fe24c2a9 100644
--- a/examples/local_models/scrape_plain_text_ollama.py
+++ b/examples/local_models/scrape_plain_text_ollama.py
@@ -30,11 +30,6 @@
         # "model_tokens": 2000, # set context length arbitrarily
         "base_url": "http://localhost:11434",
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",
-    },
     "verbose": True,
 }
 
diff --git a/examples/local_models/script_multi_generator_ollama.py b/examples/local_models/script_multi_generator_ollama.py
index dc34c910..d94faba6 100644
--- a/examples/local_models/script_multi_generator_ollama.py
+++ b/examples/local_models/script_multi_generator_ollama.py
@@ -20,11 +20,6 @@
         # "model_tokens": 2000, # set context length arbitrarily,
         "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
     "library": "beautifoulsoup",
     "verbose": True,
 }
diff --git a/examples/local_models/search_graph_ollama.py b/examples/local_models/search_graph_ollama.py
index 8ecb60c1..039ca00e 100644
--- a/examples/local_models/search_graph_ollama.py
+++ b/examples/local_models/search_graph_ollama.py
@@ -16,11 +16,6 @@
         # "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
     "max_results": 5,
     "verbose": True,
 }
diff --git a/examples/local_models/search_graph_schema_ollama.py b/examples/local_models/search_graph_schema_ollama.py
index ae7c0632..fb87954f 100644
--- a/examples/local_models/search_graph_schema_ollama.py
+++ b/examples/local_models/search_graph_schema_ollama.py
@@ -29,11 +29,6 @@ class Dishes(BaseModel):
         "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
     "verbose": True,
     "headless": False
 }
diff --git a/examples/local_models/search_link_graph_ollama.py b/examples/local_models/search_link_graph_ollama.py
index 5c594270..a05067dd 100644
--- a/examples/local_models/search_link_graph_ollama.py
+++ b/examples/local_models/search_link_graph_ollama.py
@@ -14,11 +14,7 @@
         "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
+  
     "verbose": True,
     "headless": False
 }
diff --git a/examples/local_models/smart_scraper_ollama.py b/examples/local_models/smart_scraper_ollama.py
index c3d60559..01c9f964 100644
--- a/examples/local_models/smart_scraper_ollama.py
+++ b/examples/local_models/smart_scraper_ollama.py
@@ -14,11 +14,7 @@
         "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
+  
     "verbose": True,
     "headless": False
 }
diff --git a/examples/local_models/smart_scraper_schema_ollama.py b/examples/local_models/smart_scraper_schema_ollama.py
index 7168d513..5fcff433 100644
--- a/examples/local_models/smart_scraper_schema_ollama.py
+++ b/examples/local_models/smart_scraper_schema_ollama.py
@@ -24,11 +24,7 @@ class Projects(BaseModel):
         "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
-    },
+   
     "verbose": True,
     "headless": False
 }
diff --git a/examples/local_models/xml_scraper_graph_multi_ollama.py b/examples/local_models/xml_scraper_graph_multi_ollama.py
index d84c6c9f..0494ff2c 100644
--- a/examples/local_models/xml_scraper_graph_multi_ollama.py
+++ b/examples/local_models/xml_scraper_graph_multi_ollama.py
@@ -29,11 +29,7 @@
         # "model_tokens": 2000, # set context length arbitrarily
         "base_url": "http://localhost:11434",
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",
-    },
+   
     "verbose": True,
 }
 
diff --git a/examples/local_models/xml_scraper_ollama.py b/examples/local_models/xml_scraper_ollama.py
index cc8c3ad9..50c4f8e7 100644
--- a/examples/local_models/xml_scraper_ollama.py
+++ b/examples/local_models/xml_scraper_ollama.py
@@ -30,11 +30,6 @@
         # "model_tokens": 2000, # set context length arbitrarily
         "base_url": "http://localhost:11434",
     },
-    "embeddings": {
-        "model": "ollama/nomic-embed-text",
-        "temperature": 0,
-        "base_url": "http://localhost:11434",
-    },
     "verbose": True,
 }
 
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 4ed08057..0348b3cc 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -29,8 +29,6 @@ class AbstractGraph(ABC):
         config (dict): Configuration parameters for the graph.
         schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
-        embedder_model: An instance of an embedding model client,
-                        configured for generating embeddings.
         verbose (bool): A flag indicating whether to show print statements during execution.
         headless (bool): A flag indicating whether to run the graph in headless mode.
 
@@ -85,7 +83,6 @@ def __init__(self, prompt: str, config: dict,
             "verbose": self.verbose,
             "loader_kwargs": self.loader_kwargs,
             "llm_model": self.llm_model,
-            "embedder_model": self.embedder_model,
             "cache_path": self.cache_path,
             }
 

From 9ac74de6226b701149f8ae67ace61e7793e2f712 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 14:47:49 +0200
Subject: [PATCH 42/51] Update speech_graph.py

---
 scrapegraphai/graphs/speech_graph.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py
index 8fc532cd..2ba10db9 100644
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@@ -11,7 +11,6 @@
 from ..nodes import (
     FetchNode,
     ParseNode,
-    RAGNode,
     GenerateAnswerNode,
     TextToSpeechNode,
 )
@@ -72,13 +71,6 @@ def _create_graph(self) -> BaseGraph:
                 "chunk_size": self.model_token
             }
         )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model            }
-        )
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
@@ -100,14 +92,12 @@ def _create_graph(self) -> BaseGraph:
             nodes=[
                 fetch_node,
                 parse_node,
-                rag_node,
                 generate_answer_node,
                 text_to_speech_node
             ],
             edges=[
                 (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, generate_answer_node),
+                (parse_node, generate_answer_node),
                 (generate_answer_node, text_to_speech_node)
             ],
             entry_point=fetch_node,

From 53dc06a2cbbe2cfd64b62d56c60490ae58f06d17 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 1 Aug 2024 14:48:19 +0200
Subject: [PATCH 43/51] Update speech_graph.py

---
 scrapegraphai/graphs/speech_graph.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py
index 2ba10db9..d1d6f94b 100644
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@@ -111,7 +111,7 @@ def run(self) -> str:
         Returns:
             str: The answer to the prompt.
         """
-        
+
         inputs = {"user_prompt": self.prompt, self.input_key: self.source}
         self.final_state, self.execution_info = self.graph.execute(inputs)
 
@@ -122,4 +122,4 @@ def run(self) -> str:
             "output_path", "output.mp3"))
         print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}")
 
-        return self.final_state.get("answer", "No answer found.")
\ No newline at end of file
+        return self.final_state.get("answer", "No answer found.")

From 55f706f3d5f4a8afe9dd8fc9ce9bd527f8a11894 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 1 Aug 2024 12:53:01 +0000
Subject: [PATCH 44/51] ci(release): 1.11.0-beta.7 [skip ci]

## [1.11.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.6...v1.11.0-beta.7) (2024-08-01)

### Bug Fixes

* abstract_graph and removed unused embeddings ([0b4cfd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0b4cfd6522dcad0eb418f0badd0f7824a1efd534))

### Refactor

* move embeddings code from AbstractGraph to RAGNode ([a94ebcd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a94ebcde0078d66d33e67f7e0a87850efb92d408))
* reuse code for common interface models ([bb73d91](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bb73d916a1a7b378438038ec928eeda6d8f6ac9d))
---
 CHANGELOG.md   | 13 +++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53e36c8a..52eccaa2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,16 @@
+## [1.11.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.6...v1.11.0-beta.7) (2024-08-01)
+
+
+### Bug Fixes
+
+* abstract_graph and removed unused embeddings ([0b4cfd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0b4cfd6522dcad0eb418f0badd0f7824a1efd534))
+
+
+### Refactor
+
+* move embeddings code from AbstractGraph to RAGNode ([a94ebcd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a94ebcde0078d66d33e67f7e0a87850efb92d408))
+* reuse code for common interface models ([bb73d91](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bb73d916a1a7b378438038ec928eeda6d8f6ac9d))
+
 ## [1.11.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.5...v1.11.0-beta.6) (2024-07-31)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 4a7fe29f..56acf3d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b6"
+version = "1.11.0b7"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 3e07f6273fae667b2f663be1cdd5e9c068f4c59f Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 1 Aug 2024 13:17:34 +0000
Subject: [PATCH 45/51] ci(release): 1.11.0-beta.8 [skip ci]

## [1.11.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.7...v1.11.0-beta.8) (2024-08-01)

### Features

* add integration in the abstract grapgh ([5ecdbe7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5ecdbe715f4bb223fa1be834fda07ccea2a51cb9))

### Bug Fixes

* fixed bug on fetch_node ([968c69e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/968c69e217d9c180b9b8c2aa52ca59b9a1733525))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52eccaa2..d2cdb565 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.11.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.7...v1.11.0-beta.8) (2024-08-01)
+
+
+### Features
+
+* add integration in the abstract grapgh ([5ecdbe7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5ecdbe715f4bb223fa1be834fda07ccea2a51cb9))
+
+
+### Bug Fixes
+
+* fixed bug on fetch_node ([968c69e](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/968c69e217d9c180b9b8c2aa52ca59b9a1733525))
+
 ## [1.11.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.6...v1.11.0-beta.7) (2024-08-01)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 56acf3d7..431488e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b7"
+version = "1.11.0b8"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From 9355507a2dc73342f325b6649e871df48ae13567 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Fri, 2 Aug 2024 12:00:00 +0200
Subject: [PATCH 46/51] feat: refactoring of the code

---
 scrapegraphai/nodes/base_node.py              |  6 +++--
 scrapegraphai/nodes/fetch_node.py             | 12 ++++-----
 .../nodes/generate_answer_csv_node.py         |  6 -----
 scrapegraphai/nodes/generate_answer_node.py   |  3 ---
 .../nodes/generate_answer_omni_node.py        |  4 +--
 .../nodes/generate_answer_pdf_node.py         |  5 ----
 scrapegraphai/nodes/generate_scraper_node.py  |  2 --
 scrapegraphai/nodes/get_probable_tags_node.py |  1 -
 scrapegraphai/nodes/graph_iterator_node.py    | 14 +++++------
 scrapegraphai/nodes/image_to_text_node.py     |  4 +--
 scrapegraphai/nodes/merge_answers_node.py     |  8 ------
 .../nodes/merge_generated_scripts.py          |  6 -----
 scrapegraphai/nodes/parse_node.py             | 12 ++++-----
 scrapegraphai/nodes/robots_node.py            |  8 +-----
 scrapegraphai/nodes/search_internet_node.py   |  2 --
 scrapegraphai/nodes/search_link_node.py       |  6 -----
 .../nodes/search_node_with_context.py         |  1 -
 scrapegraphai/nodes/text_to_speech_node.py    |  3 ---
 scrapegraphai/utils/convert_to_md.py          |  2 +-
 scrapegraphai/utils/logging.py                | 22 ++++++++--------
 scrapegraphai/utils/parse_state_keys.py       | 13 ++++++----
 scrapegraphai/utils/proxy_rotation.py         |  1 -
 scrapegraphai/utils/research_web.py           | 25 +++++++++++--------
 scrapegraphai/utils/sys_dynamic_import.py     |  5 +---
 scrapegraphai/utils/token_calculator.py       |  3 ++-
 25 files changed, 65 insertions(+), 109 deletions(-)

diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py
index d1b59500..26fc44c4 100644
--- a/scrapegraphai/nodes/base_node.py
+++ b/scrapegraphai/nodes/base_node.py
@@ -86,7 +86,8 @@ def update_config(self, params: dict, overwrite: bool = False):
 
         Args:
             param (dict): The dictionary to update node_config with.
-            overwrite (bool): Flag indicating if the values of node_config should be overwritten if their value is not None.
+            overwrite (bool): Flag indicating if the values of node_config 
+            should be overwritten if their value is not None.
         """
         for key, val in params.items():
             if hasattr(self, key) and not overwrite:
@@ -133,7 +134,8 @@ def _validate_input_keys(self, input_keys):
 
     def _parse_input_keys(self, state: dict, expression: str) -> List[str]:
         """
-        Parses the input keys expression to extract relevant keys from the state based on logical conditions.
+        Parses the input keys expression to extract 
+        relevant keys from the state based on logical conditions.
         The expression can contain AND (&), OR (|), and parentheses to group conditions.
 
         Args:
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 4971ddb3..11cbb5fb 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -133,7 +133,7 @@ def execute(self, state):
             state.update({self.output[0]: compressed_document})
             return state
         elif input_keys[0] == "json":
-            f = open(source)
+            f = open(source, encoding="utf-8")
             compressed_document = [
                 Document(page_content=str(json.load(f)), metadata={"source": "json"})
             ]
@@ -181,12 +181,11 @@ def execute(self, state):
                 if not response.text.strip():
                     raise ValueError("No HTML body content found in the response.")
 
-                parsed_content = response
-   
                 if not self.cut:
                     parsed_content = cleanup_html(response, source)
 
-                if  (isinstance(self.llm_model, ChatOpenAI) and not self.script_creator) or (self.force and not self.script_creator):
+                if  (isinstance(self.llm_model, ChatOpenAI)
+                     and not self.script_creator) or (self.force and not self.script_creator):
                     parsed_content = convert_to_md(source, input_data[0])
                 compressed_document = [Document(page_content=parsed_content)]
             else:
@@ -205,7 +204,8 @@ def execute(self, state):
                 data =  browser_base_fetch(self.browser_base.get("api_key"),
                                             self.browser_base.get("project_id"), [source])
 
-                document = [Document(page_content=content, metadata={"source": source}) for content in data]
+                document = [Document(page_content=content,
+                                    metadata={"source": source}) for content in data]
             else:
                 loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
                 document = loader.load()
@@ -215,10 +215,8 @@ def execute(self, state):
             parsed_content = document[0].page_content
 
             if  isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled:
-
                 parsed_content = convert_to_md(document[0].page_content, input_data[0])
 
-
             compressed_document = [
                 Document(page_content=parsed_content, metadata={"source": "html file"})
             ]
diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py
index 6ce19ef2..a91dae3f 100644
--- a/scrapegraphai/nodes/generate_answer_csv_node.py
+++ b/scrapegraphai/nodes/generate_answer_csv_node.py
@@ -3,18 +3,12 @@
 Module for generating the answer node
 """
 
-# Imports from standard library
 from typing import List, Optional
-
-# Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
-
 from ..utils.logging import get_logger
-
-# Imports from the library
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
 
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index 12ae6f0f..9c530688 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -1,7 +1,6 @@
 """
 GenerateAnswerNode Module
 """
-import asyncio
 from typing import List, Optional
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
@@ -9,7 +8,6 @@
 from langchain_openai import ChatOpenAI
 from langchain_community.chat_models import ChatOllama
 from tqdm import tqdm
-from langchain_openai import ChatOpenAI
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_md, template_no_chunks_md, template_merge_md
@@ -130,7 +128,6 @@ def execute(self, state: dict) -> dict:
                 partial_variables={"context": chunk,
                                 "chunk_id": i + 1,
                                 "format_instructions": format_instructions})
-            # Add chain to dictionary with dynamic name
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm_model | output_parser
 
diff --git a/scrapegraphai/nodes/generate_answer_omni_node.py b/scrapegraphai/nodes/generate_answer_omni_node.py
index 98be26dd..93e96f4e 100644
--- a/scrapegraphai/nodes/generate_answer_omni_node.py
+++ b/scrapegraphai/nodes/generate_answer_omni_node.py
@@ -113,7 +113,7 @@ def execute(self, state: dict) -> dict:
 
             chain =  prompt | self.llm_model | output_parser
             answer = chain.invoke({"question": user_prompt})
-            
+
             state.update({self.output[0]: answer})
             return state
 
@@ -148,4 +148,4 @@ def execute(self, state: dict) -> dict:
         answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
 
         state.update({self.output[0]: answer})
-        return state
\ No newline at end of file
+        return state
diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py
index 47f14e86..4cef7ae9 100644
--- a/scrapegraphai/nodes/generate_answer_pdf_node.py
+++ b/scrapegraphai/nodes/generate_answer_pdf_node.py
@@ -2,18 +2,13 @@
 Module for generating the answer node
 """
 
-# Imports from standard library
 from typing import List, Optional
-
-# Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from tqdm import tqdm
 from langchain_community.chat_models import ChatOllama
 from ..utils.logging import get_logger
-
-# Imports from the library
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
 
diff --git a/scrapegraphai/nodes/generate_scraper_node.py b/scrapegraphai/nodes/generate_scraper_node.py
index 393f5e90..733898bd 100644
--- a/scrapegraphai/nodes/generate_scraper_node.py
+++ b/scrapegraphai/nodes/generate_scraper_node.py
@@ -83,7 +83,6 @@ def execute(self, state: dict) -> dict:
         user_prompt = input_data[0]
         doc = input_data[1]
 
-        # schema to be used for output parsing
         if self.node_config.get("schema", None) is not None:
             output_schema = JsonOutputParser(pydantic_object=self.node_config["schema"])
         else:
@@ -130,7 +129,6 @@ def execute(self, state: dict) -> dict:
         )
         map_chain = prompt | self.llm_model | StrOutputParser()
 
-        # Chain
         answer = map_chain.invoke({"question": user_prompt})
 
         state.update({self.output[0]: answer})
diff --git a/scrapegraphai/nodes/get_probable_tags_node.py b/scrapegraphai/nodes/get_probable_tags_node.py
index f31633c0..38c2ba15 100644
--- a/scrapegraphai/nodes/get_probable_tags_node.py
+++ b/scrapegraphai/nodes/get_probable_tags_node.py
@@ -1,7 +1,6 @@
 """
 GetProbableTagsNode Module
 """
-
 from typing import List, Optional
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
diff --git a/scrapegraphai/nodes/graph_iterator_node.py b/scrapegraphai/nodes/graph_iterator_node.py
index 061be77a..6ce4bdaf 100644
--- a/scrapegraphai/nodes/graph_iterator_node.py
+++ b/scrapegraphai/nodes/graph_iterator_node.py
@@ -5,13 +5,11 @@
 import asyncio
 import copy
 from typing import List, Optional
-
 from tqdm.asyncio import tqdm
-
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 
-_default_batchsize = 16
+DEFAULT_BATCHSIZE = 16
 
 
 class GraphIteratorNode(BaseNode):
@@ -51,13 +49,15 @@ def execute(self, state: dict) -> dict:
                             the correct data from the state.
 
         Returns:
-            dict: The updated state with the output key containing the results of the graph instances.
+            dict: The updated state with the output key c
+            ontaining the results of the graph instances.
 
         Raises:
-            KeyError: If the input keys are not found in the state, indicating that the
-                        necessary information for running the graph instances is missing.
+            KeyError: If the input keys are not found in the state, 
+            indicating that thenecessary information for running 
+            the graph instances is missing.
         """
-        batchsize = self.node_config.get("batchsize", _default_batchsize)
+        batchsize = self.node_config.get("batchsize", DEFAULT_BATCHSIZE)
 
         self.logger.info(
             f"--- Executing {self.node_name} Node with batchsize {batchsize} ---"
diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py
index 7e7507a9..c1a69390 100644
--- a/scrapegraphai/nodes/image_to_text_node.py
+++ b/scrapegraphai/nodes/image_to_text_node.py
@@ -3,14 +3,14 @@
 """
 
 from typing import List, Optional
-
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 
 
 class ImageToTextNode(BaseNode):
     """
-    Retrieve images from a list of URLs and return a description of the images using an image-to-text model.
+    Retrieve images from a list of URLs and return a description of 
+    the images using an image-to-text model.
 
     Attributes:
         llm_model: An instance of the language model client used for image-to-text conversion.
diff --git a/scrapegraphai/nodes/merge_answers_node.py b/scrapegraphai/nodes/merge_answers_node.py
index 0efd8ec8..548b7c04 100644
--- a/scrapegraphai/nodes/merge_answers_node.py
+++ b/scrapegraphai/nodes/merge_answers_node.py
@@ -2,18 +2,10 @@
 MergeAnswersNode Module
 """
 
-# Imports from standard library
 from typing import List, Optional
-from tqdm import tqdm
-
-# Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
-from tqdm import tqdm
-
 from ..utils.logging import get_logger
-
-# Imports from the library
 from .base_node import BaseNode
 
 
diff --git a/scrapegraphai/nodes/merge_generated_scripts.py b/scrapegraphai/nodes/merge_generated_scripts.py
index cfda3960..8c8eaecd 100644
--- a/scrapegraphai/nodes/merge_generated_scripts.py
+++ b/scrapegraphai/nodes/merge_generated_scripts.py
@@ -5,15 +5,9 @@
 # Imports from standard library
 from typing import List, Optional
 from tqdm import tqdm
-
-# Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
-from tqdm import tqdm
-
 from ..utils.logging import get_logger
-
-# Imports from the library
 from .base_node import BaseNode
 
 
diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py
index b5418717..d1bb87bd 100644
--- a/scrapegraphai/nodes/parse_node.py
+++ b/scrapegraphai/nodes/parse_node.py
@@ -75,23 +75,23 @@ def execute(self, state: dict) -> dict:
 
             chunks = chunk(text=docs_transformed.page_content,
                             chunk_size= self.node_config.get("chunk_size", 4096)-250,
-                            token_counter=lambda x: len(x),
+                            token_counter= lambda x: len(x),
                             memoize=False)
         else:
             docs_transformed = docs_transformed[0]
 
-            if type(docs_transformed) == Document:
+            if isinstance(docs_transformed, Document):
                 chunks = chunk(text=docs_transformed.page_content,
                             chunk_size= self.node_config.get("chunk_size", 4096)-250,
-                            token_counter=lambda x: len(x),
+                            token_counter= lambda x: len(x),
                             memoize=False)
             else:
-                
+
                 chunks = chunk(text=docs_transformed,
                                 chunk_size= self.node_config.get("chunk_size", 4096)-250,
-                                token_counter=lambda x: len(x),
+                                token_counter= lambda x: len(x),
                                 memoize=False)
-                          
+    
         state.update({self.output[0]: chunks})
 
         return state
diff --git a/scrapegraphai/nodes/robots_node.py b/scrapegraphai/nodes/robots_node.py
index 66231600..7fa2fe6b 100644
--- a/scrapegraphai/nodes/robots_node.py
+++ b/scrapegraphai/nodes/robots_node.py
@@ -4,15 +4,9 @@
 
 from typing import List, Optional
 from urllib.parse import urlparse
-
 from langchain_community.document_loaders import AsyncChromiumLoader
 from langchain.prompts import PromptTemplate
 from langchain.output_parsers import CommaSeparatedListOutputParser
-
-from langchain.output_parsers import CommaSeparatedListOutputParser
-from langchain.prompts import PromptTemplate
-from langchain_community.document_loaders import AsyncChromiumLoader
-
 from ..helpers import robots_dictionary
 from ..utils.logging import get_logger
 from .base_node import BaseNode
@@ -146,4 +140,4 @@ def execute(self, state: dict) -> dict:
                 self.logger.warning("\033[32m(Scraping this website is allowed)\033[0m")
 
         state.update({self.output[0]: is_scrapable})
-        return state
\ No newline at end of file
+        return state
diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py
index 7588b995..61b11995 100644
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@@ -1,9 +1,7 @@
 """
 SearchInternetNode Module
 """
-
 from typing import List, Optional
-
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
 from langchain_community.chat_models import ChatOllama
diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py
index b3d289d9..6fbe51dd 100644
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@@ -2,19 +2,13 @@
 SearchLinkNode Module
 """
 
-# Imports from standard library
 from typing import List, Optional
 import re
 from tqdm import tqdm
-
-# Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
-
 from ..utils.logging import get_logger
-
-# Imports from the library
 from .base_node import BaseNode
 
 
diff --git a/scrapegraphai/nodes/search_node_with_context.py b/scrapegraphai/nodes/search_node_with_context.py
index 62de184a..678e44ae 100644
--- a/scrapegraphai/nodes/search_node_with_context.py
+++ b/scrapegraphai/nodes/search_node_with_context.py
@@ -67,7 +67,6 @@ def execute(self, state: dict) -> dict:
         # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
-        user_prompt = input_data[0]
         doc = input_data[1]
 
         output_parser = CommaSeparatedListOutputParser()
diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node.py
index 59e3fb8b..e8e43cb5 100644
--- a/scrapegraphai/nodes/text_to_speech_node.py
+++ b/scrapegraphai/nodes/text_to_speech_node.py
@@ -1,13 +1,10 @@
 """
 TextToSpeechNode Module
 """
-
 from typing import List, Optional
-
 from ..utils.logging import get_logger
 from .base_node import BaseNode
 
-
 class TextToSpeechNode(BaseNode):
     """
     Converts text to speech using the specified text-to-speech model.
diff --git a/scrapegraphai/utils/convert_to_md.py b/scrapegraphai/utils/convert_to_md.py
index 6f1a2334..74478bcc 100644
--- a/scrapegraphai/utils/convert_to_md.py
+++ b/scrapegraphai/utils/convert_to_md.py
@@ -1,8 +1,8 @@
 """
 convert_to_md modul
 """
-import html2text
 from urllib.parse import urlparse
+import html2text
 
 def convert_to_md(html: str, url: str = None) -> str:
     """ Convert HTML to Markdown.
diff --git a/scrapegraphai/utils/logging.py b/scrapegraphai/utils/logging.py
index 2684d0b1..afb63c52 100644
--- a/scrapegraphai/utils/logging.py
+++ b/scrapegraphai/utils/logging.py
@@ -12,7 +12,7 @@
 
 _library_name = __name__.split(".", maxsplit=1)[0]
 
-_default_handler = None
+DEFAULT_HANDLER = None
 _default_logging_level = logging.WARNING
 
 _semaphore = threading.Lock()
@@ -23,22 +23,22 @@ def _get_library_root_logger() -> logging.Logger:
 
 
 def _set_library_root_logger() -> None:
-    global _default_handler
+    global DEFAULT_HANDLER
 
     with _semaphore:
-        if _default_handler:
+        if DEFAULT_HANDLER:
             return
 
-        _default_handler = logging.StreamHandler()  # sys.stderr as stream
+        DEFAULT_HANDLER = logging.StreamHandler()  # sys.stderr as stream
 
         # https://github.com/pyinstaller/pyinstaller/issues/7334#issuecomment-1357447176
         if sys.stderr is None:
-            sys.stderr = open(os.devnull, "w")
+            sys.stderr = open(os.devnull, "w", encoding="utf-8")
 
-        _default_handler.flush = sys.stderr.flush
+        DEFAULT_HANDLER.flush = sys.stderr.flush
 
         library_root_logger = _get_library_root_logger()
-        library_root_logger.addHandler(_default_handler)
+        library_root_logger.addHandler(DEFAULT_HANDLER)
         library_root_logger.setLevel(_default_logging_level)
         library_root_logger.propagate = False
 
@@ -86,8 +86,8 @@ def set_handler(handler: logging.Handler) -> None:
     _get_library_root_logger().addHandler(handler)
 
 
-def set_default_handler() -> None:
-    set_handler(_default_handler)
+def setDEFAULT_HANDLER() -> None:
+    set_handler(DEFAULT_HANDLER)
 
 
 def unset_handler(handler: logging.Handler) -> None:
@@ -98,8 +98,8 @@ def unset_handler(handler: logging.Handler) -> None:
     _get_library_root_logger().removeHandler(handler)
 
 
-def unset_default_handler() -> None:
-    unset_handler(_default_handler)
+def unsetDEFAULT_HANDLER() -> None:
+    unset_handler(DEFAULT_HANDLER)
 
 
 def set_propagation() -> None:
diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py
index 85712ef6..107397e9 100644
--- a/scrapegraphai/utils/parse_state_keys.py
+++ b/scrapegraphai/utils/parse_state_keys.py
@@ -13,19 +13,22 @@ def parse_expression(expression, state: dict) -> list:
         state (dict): Dictionary of state keys used to evaluate the expression.
 
     Raises:
-        ValueError: If the expression is empty, has adjacent state keys without operators, invalid operator usage,
-                    unbalanced parentheses, or if no state keys match the expression.
+        ValueError: If the expression is empty, has adjacent state keys without operators, 
+        invalid operator usage, unbalanced parentheses, or if no state keys match the expression.
 
     Returns:
-        list: A list of state keys that match the boolean expression, ensuring each key appears only once.
+        list: A list of state keys that match the boolean expression, 
+        ensuring each key appears only once.
 
     Example:
         >>> parse_expression("user_input & (relevant_chunks | parsed_document | document)", 
                             {"user_input": None, "document": None, "parsed_document": None, "relevant_chunks": None})
         ['user_input', 'relevant_chunks', 'parsed_document', 'document']
 
-    This function evaluates the expression to determine the logical inclusion of state keys based on provided boolean logic.
-    It checks for syntax errors such as unbalanced parentheses, incorrect adjacency of operators, and empty expressions.
+    This function evaluates the expression to determine the 
+    logical inclusion of state keys based on provided boolean logic.
+    It checks for syntax errors such as unbalanced parentheses, 
+    incorrect adjacency of operators, and empty expressions.
     """
 
     # Check for empty expression
diff --git a/scrapegraphai/utils/proxy_rotation.py b/scrapegraphai/utils/proxy_rotation.py
index 07e04d0f..6f6019e9 100644
--- a/scrapegraphai/utils/proxy_rotation.py
+++ b/scrapegraphai/utils/proxy_rotation.py
@@ -6,7 +6,6 @@
 import random
 import re
 from typing import List, Optional, Set, TypedDict
-
 import requests
 from fp.errors import FreeProxyException
 from fp.fp import FreeProxy
diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index 101693e4..fe7902d3 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -1,3 +1,6 @@
+"""
+Research_web module
+"""
 import re
 from typing import List
 from langchain_community.tools import DuckDuckGoSearchResults
@@ -5,13 +8,15 @@
 import requests
 from bs4 import BeautifulSoup
 
-def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10, port: int = 8080) -> List[str]:
+def search_on_web(query: str, search_engine: str = "Google", 
+                  max_results: int = 10, port: int = 8080) -> List[str]:
     """
     Searches the web for a given query using specified search engine options.
 
     Args:
         query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG'. Default is 'Google'.
+        search_engine (str, optional): Specifies the search engine to use, 
+        options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG'. Default is 'Google'.
         max_results (int, optional): The maximum number of search results to return.
         port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
 
@@ -25,19 +30,19 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
         >>> search_on_web("example query", search_engine="Google", max_results=5)
         ['http://example.com', 'http://example.org', ...]
     """
-    
+
     if search_engine.lower() == "google":
         res = []
         for url in google_search(query, stop=max_results):
             res.append(url)
         return res
-    
+
     elif search_engine.lower() == "duckduckgo":
         research = DuckDuckGoSearchResults(max_results=max_results)
         res = research.run(query)
         links = re.findall(r'https?://[^\s,\]]+', res)
         return links
-    
+
     elif search_engine.lower() == "bing":
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@@ -46,24 +51,24 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
         response = requests.get(search_url, headers=headers)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
-    
+
         search_results = []
         for result in soup.find_all('li', class_='b_algo', limit=max_results):
             link = result.find('a')['href']
             search_results.append(link)
         return search_results
-    
+
     elif search_engine.lower() == "searxng":
         url = f"http://localhost:{port}"
         params = {"q": query, "format": "json"}
-    
+
         # Send the GET request to the server
         response = requests.get(url, params=params)
-    
+
         # Parse the response and limit to the specified max_results
         data = response.json()
         limited_results = data["results"][:max_results]
         return limited_results
-    
+
     else:
         raise ValueError("The only search engines available are DuckDuckGo, Google, Bing, or SearXNG")
diff --git a/scrapegraphai/utils/sys_dynamic_import.py b/scrapegraphai/utils/sys_dynamic_import.py
index 30f75d15..19b0d29a 100644
--- a/scrapegraphai/utils/sys_dynamic_import.py
+++ b/scrapegraphai/utils/sys_dynamic_import.py
@@ -5,7 +5,7 @@
 
 import sys
 import typing
-
+import importlib.util  # noqa: F401
 
 if typing.TYPE_CHECKING:
     import types
@@ -24,9 +24,6 @@ def srcfile_import(modpath: str, modname: str) -> "types.ModuleType":
     Raises:
         ImportError: If the module cannot be imported from the srcfile
     """
-    import importlib.util  # noqa: F401
-
-    #
     spec = importlib.util.spec_from_file_location(modname, modpath)
 
     if spec is None:
diff --git a/scrapegraphai/utils/token_calculator.py b/scrapegraphai/utils/token_calculator.py
index 5b23fdf4..c5263efe 100644
--- a/scrapegraphai/utils/token_calculator.py
+++ b/scrapegraphai/utils/token_calculator.py
@@ -22,7 +22,8 @@ def truncate_text_tokens(text: str, model: str, encoding_name: str) -> List[str]
         >>> truncate_text_tokens("This is a sample text for truncation.", "GPT-3", "EMBEDDING_ENCODING")
         ["This is a sample text", "for truncation."]
 
-    This function ensures that each chunk of text can be tokenized by the specified model without exceeding the model's token limit.
+    This function ensures that each chunk of text can be tokenized 
+    by the specified model without exceeding the model's token limit.
     """
 
     encoding = tiktoken.get_encoding(encoding_name)

From 4440790f00c1ddd416add7af895756ab42c30bf3 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 2 Aug 2024 10:01:32 +0000
Subject: [PATCH 47/51] ci(release): 1.11.0-beta.9 [skip ci]

## [1.11.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.8...v1.11.0-beta.9) (2024-08-02)

### Features

* refactoring of the code ([9355507](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9355507a2dc73342f325b6649e871df48ae13567))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2cdb565..481eeae6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.11.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.8...v1.11.0-beta.9) (2024-08-02)
+
+
+### Features
+
+* refactoring of the code ([9355507](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9355507a2dc73342f325b6649e871df48ae13567))
+
 ## [1.11.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.7...v1.11.0-beta.8) (2024-08-01)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 431488e5..70be09b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b8"
+version = "1.11.0b9"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From ade28fca2c3fdf40f28a80854e3b8435a52a6930 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Fri, 2 Aug 2024 12:15:43 +0200
Subject: [PATCH 48/51] fix(AbstractGraph): instantiation of Azure GPT models

Closes #498
---
 requirements-dev.lock                  | 1 +
 requirements.lock                      | 1 +
 requirements.txt                       | 1 +
 scrapegraphai/graphs/abstract_graph.py | 8 ++++----
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 24b7156d..d14f9d42 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -185,6 +185,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/requirements.lock b/requirements.lock
index 0e8bb930..7dbac1f3 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -133,6 +133,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.9.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/requirements.txt b/requirements.txt
index 8f3f5da5..9c11363c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,4 @@ semchunk>=1.0.1
 langchain-fireworks>=0.1.3
 langchain-community>=0.2.9
 langchain-huggingface>=0.0.3
+browserbase==0.3.0
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index a7493351..f07bcb10 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -146,6 +146,10 @@ def handle_model(model_name, provider, token_key, default_token=8192):
             llm_params["model"] = model_name
             return init_chat_model(**llm_params)
 
+        if "azure" in llm_params["model"]:
+            model_name = llm_params["model"].split("/")[-1]
+            return handle_model(model_name, "azure_openai", model_name)
+
         if "gpt-" in llm_params["model"]:
             return handle_model(llm_params["model"], "openai", llm_params["model"])
 
@@ -154,10 +158,6 @@ def handle_model(model_name, provider, token_key, default_token=8192):
             token_key = llm_params["model"].split("/")[-1]
             return handle_model(model_name, "fireworks", token_key)
 
-        if "azure" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "azure_openai", model_name)
-
         if "gemini" in llm_params["model"]:
             model_name = llm_params["model"].split("/")[-1]
             return handle_model(model_name, "google_genai", model_name)

From ee30a83f8a77958be6881ca0a94b02d278f37a61 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 2 Aug 2024 12:33:59 +0000
Subject: [PATCH 49/51] ci(release): 1.11.0-beta.10 [skip ci]

## [1.11.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.9...v1.11.0-beta.10) (2024-08-02)

### Bug Fixes

* **AbstractGraph:** instantiation of Azure GPT models ([ade28fc](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ade28fca2c3fdf40f28a80854e3b8435a52a6930)), closes [#498](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/498)
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 481eeae6..cf226b3c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.11.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.9...v1.11.0-beta.10) (2024-08-02)
+
+
+### Bug Fixes
+
+* **AbstractGraph:** instantiation of Azure GPT models ([ade28fc](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ade28fca2c3fdf40f28a80854e3b8435a52a6930)), closes [#498](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/498)
+
 ## [1.11.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.11.0-beta.8...v1.11.0-beta.9) (2024-08-02)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 70be09b5..576861bc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.11.0b9"
+version = "1.11.0b10"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [

From a3d7f1b71c0633e2c880f58e210b9516b331fe56 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 5 Aug 2024 14:54:14 +0200
Subject: [PATCH 50/51] Update csv_scraper_graph.py

---
 scrapegraphai/graphs/csv_scraper_graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py
index f4efd1fb..d7ec186e 100644
--- a/scrapegraphai/graphs/csv_scraper_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_graph.py
@@ -36,7 +36,7 @@ def _create_graph(self):
             input="csv | csv_dir",
             output=["doc"],
         )
-     
+
         generate_answer_node = GenerateAnswerCSVNode(
             input="user_prompt & (relevant_chunks | doc)",
             output=["answer"],

From 66a29bc5cc2e8fc43409ca6fa6de928ccad78802 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 5 Aug 2024 20:54:53 +0200
Subject: [PATCH 51/51] refactoring of the openai examples

Co-Authored-By: Matteo Vedovati <68272450+vedovati-matteo@users.noreply.github.com>
---
 examples/openai/csv_scraper_graph_multi_openai.py |  2 +-
 examples/openai/csv_scraper_openai.py             |  2 +-
 examples/openai/custom_graph_openai.py            |  2 +-
 examples/openai/deep_scraper_openai.py            |  2 +-
 examples/openai/json_scraper_multi_openai.py      |  2 +-
 examples/openai/json_scraper_openai.py            |  2 +-
 examples/openai/md_scraper_openai.py              |  2 +-
 examples/openai/pdf_scraper_multi_openai.py       |  7 +++----
 examples/openai/pdf_scraper_openai.py             |  2 +-
 examples/openai/scrape_plain_text_openai.py       |  2 +-
 examples/openai/script_generator_openai.py        |  2 +-
 examples/openai/search_graph_schema_openai.py     |  7 +++----
 examples/openai/search_link_graph_openai.py       | 11 +++++++++--
 examples/openai/smart_scraper_schema_openai.py    |  2 +-
 examples/openai/speech_graph_openai.py            |  2 +-
 examples/openai/xml_scraper_graph_multi_openai.py |  2 +-
 examples/openai/xml_scraper_openai.py             |  2 +-
 scrapegraphai/graphs/search_graph.py              |  1 -
 18 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/examples/openai/csv_scraper_graph_multi_openai.py b/examples/openai/csv_scraper_graph_multi_openai.py
index 771ad679..7b91c896 100644
--- a/examples/openai/csv_scraper_graph_multi_openai.py
+++ b/examples/openai/csv_scraper_graph_multi_openai.py
@@ -27,7 +27,7 @@
 graph_config = {
      "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/csv_scraper_openai.py b/examples/openai/csv_scraper_openai.py
index 211f14f9..744fc7a4 100644
--- a/examples/openai/csv_scraper_openai.py
+++ b/examples/openai/csv_scraper_openai.py
@@ -28,7 +28,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/custom_graph_openai.py b/examples/openai/custom_graph_openai.py
index 905473e0..6687e0ef 100644
--- a/examples/openai/custom_graph_openai.py
+++ b/examples/openai/custom_graph_openai.py
@@ -19,7 +19,7 @@
 graph_config = {
      "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/deep_scraper_openai.py b/examples/openai/deep_scraper_openai.py
index 4860a31f..5b7202d4 100644
--- a/examples/openai/deep_scraper_openai.py
+++ b/examples/openai/deep_scraper_openai.py
@@ -18,7 +18,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-4",
+        "model": "gpt-4o",
     },
     "verbose": True,
     "max_depth": 1
diff --git a/examples/openai/json_scraper_multi_openai.py b/examples/openai/json_scraper_multi_openai.py
index 021cd6e1..b27e5050 100644
--- a/examples/openai/json_scraper_multi_openai.py
+++ b/examples/openai/json_scraper_multi_openai.py
@@ -13,7 +13,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     }
 }
 
diff --git a/examples/openai/json_scraper_openai.py b/examples/openai/json_scraper_openai.py
index 25fc85af..eb5d1e7e 100644
--- a/examples/openai/json_scraper_openai.py
+++ b/examples/openai/json_scraper_openai.py
@@ -28,7 +28,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/md_scraper_openai.py b/examples/openai/md_scraper_openai.py
index 7a163137..c3e2a3df 100644
--- a/examples/openai/md_scraper_openai.py
+++ b/examples/openai/md_scraper_openai.py
@@ -28,7 +28,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/pdf_scraper_multi_openai.py b/examples/openai/pdf_scraper_multi_openai.py
index 9e699e58..49a9c7fa 100644
--- a/examples/openai/pdf_scraper_multi_openai.py
+++ b/examples/openai/pdf_scraper_multi_openai.py
@@ -3,11 +3,10 @@
 """
 import os
 import json
+from typing import List
 from dotenv import load_dotenv
-from scrapegraphai.graphs import PdfScraperMultiGraph
-
 from pydantic import BaseModel, Field
-from typing import List
+from scrapegraphai.graphs import PdfScraperMultiGraph
 
 load_dotenv()
 
@@ -20,7 +19,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "verbose": True,
 }
diff --git a/examples/openai/pdf_scraper_openai.py b/examples/openai/pdf_scraper_openai.py
index e07a7ab5..2b0e19f3 100644
--- a/examples/openai/pdf_scraper_openai.py
+++ b/examples/openai/pdf_scraper_openai.py
@@ -14,7 +14,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "verbose": True,
 }
diff --git a/examples/openai/scrape_plain_text_openai.py b/examples/openai/scrape_plain_text_openai.py
index ffe0054a..7f390cff 100644
--- a/examples/openai/scrape_plain_text_openai.py
+++ b/examples/openai/scrape_plain_text_openai.py
@@ -30,7 +30,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
 }
 
diff --git a/examples/openai/script_generator_openai.py b/examples/openai/script_generator_openai.py
index 14c00ab4..046a25ec 100644
--- a/examples/openai/script_generator_openai.py
+++ b/examples/openai/script_generator_openai.py
@@ -18,7 +18,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "library": "beautifulsoup"
 }
diff --git a/examples/openai/search_graph_schema_openai.py b/examples/openai/search_graph_schema_openai.py
index e5131461..ecbcc644 100644
--- a/examples/openai/search_graph_schema_openai.py
+++ b/examples/openai/search_graph_schema_openai.py
@@ -3,14 +3,13 @@
 """
 
 import os
+from typing import List
 from dotenv import load_dotenv
-load_dotenv()
-
+from pydantic import BaseModel, Field
 from scrapegraphai.graphs import SearchGraph
 from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
 
-from pydantic import BaseModel, Field
-from typing import List
+load_dotenv()
 
 # ************************************************
 # Define the output schema for the graph
diff --git a/examples/openai/search_link_graph_openai.py b/examples/openai/search_link_graph_openai.py
index 10d10d4c..818f9434 100644
--- a/examples/openai/search_link_graph_openai.py
+++ b/examples/openai/search_link_graph_openai.py
@@ -1,16 +1,23 @@
 """ 
 Basic example of scraping pipeline using SmartScraper
 """
+
+import os
+from dotenv import load_dotenv
 from scrapegraphai.graphs import SearchLinkGraph
 from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 
+openai_key = os.getenv("OPENAI_APIKEY")
+
 graph_config = {
     "llm": {
-        "api_key": "s",
-        "model": "gpt-3.5-turbo",
+        "api_key": openai_key,
+        "model": "gpt-4o",
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/openai/smart_scraper_schema_openai.py b/examples/openai/smart_scraper_schema_openai.py
index 076f1327..d9e1bd1c 100644
--- a/examples/openai/smart_scraper_schema_openai.py
+++ b/examples/openai/smart_scraper_schema_openai.py
@@ -30,7 +30,7 @@ class Projects(BaseModel):
 graph_config = {
     "llm": {
         "api_key":openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/openai/speech_graph_openai.py b/examples/openai/speech_graph_openai.py
index 15cc2cfb..603ce51c 100644
--- a/examples/openai/speech_graph_openai.py
+++ b/examples/openai/speech_graph_openai.py
@@ -25,7 +25,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
         "temperature": 0.7,
     },
     "tts_model": {
diff --git a/examples/openai/xml_scraper_graph_multi_openai.py b/examples/openai/xml_scraper_graph_multi_openai.py
index 46633bba..ef46b877 100644
--- a/examples/openai/xml_scraper_graph_multi_openai.py
+++ b/examples/openai/xml_scraper_graph_multi_openai.py
@@ -29,7 +29,7 @@
 graph_config = {
     "llm": {
         "api_key":openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "verbose": True,
     "headless": False,
diff --git a/examples/openai/xml_scraper_openai.py b/examples/openai/xml_scraper_openai.py
index 5be5716e..b2b5075e 100644
--- a/examples/openai/xml_scraper_openai.py
+++ b/examples/openai/xml_scraper_openai.py
@@ -28,7 +28,7 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gpt-4o",
     },
     "verbose":False,
 }
diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py
index 787e537a..0c0f1104 100644
--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@@ -53,7 +53,6 @@ def __init__(self, prompt: str, config: dict, schema: Optional[BaseModel] = None
             self.copy_config = copy(config)
         else:
             self.copy_config = deepcopy(config)
-        
         self.copy_schema = deepcopy(schema)
         self.considered_urls = []  # New attribute to store URLs