In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:

# Import Necessary Libraries

from dotenv import load_dotenv, dotenv_values
import google.generativeai as genai
from IPython.display import Markdown, display
import os

# Load .env file

load_dotenv()

# Set the TESSDATA_PREFIX environment variable

tessdata = os.getenv("TESSDATA_PREFIX")
os.environ['TESSDATA_PREFIX'] = tessdata

# Set the Google api key

my_api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=my_api_key)

In [5]:

from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

# Initialize google embedding model

embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

# Initialize google chat model 

model = ChatGoogleGenerativeAI(model= "gemini-1.5-flash", temperature = 0)

In [6]:
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [7]:
# Split and Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                               chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [8]:
## Call Embedding Model
embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [None]:
# !pip install psycopg2-binary pgvector

In [9]:
from langchain.vectorstores.pgvector import PGVector
CONNECTION_STRING = "postgresql+psycopg2://postgres:Vector123@localhost:5432/vector_db"
COLLECTION_NAME = "decomposition_vectors"

db = PGVector.from_documents(embedding = embedding, documents=splits, collection_name = COLLECTION_NAME, 
                             connection_string = CONNECTION_STRING, use_jsonb=True,)

In [10]:
retriever = db.as_retriever(search_kwargs={"k": 1})

In [11]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [12]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)


In [13]:
from pprint import pprint
result = rag_chain.invoke("What is Task Decomposition?")
pprint(result)

('Task decomposition is the process of breaking down a complex task into '
 'smaller, more manageable steps. This is often achieved through the use of '
 'chain of thought prompting, which encourages the model to think step-by-step '
 'and decompose the task into simpler subtasks. This approach helps to improve '
 "model performance on complex tasks and provides insights into the model's "
 'reasoning process. \n')
