In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [11]:
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader


In [6]:
loader = DirectoryLoader(
    "./data",
    glob="**/*.pdf",
    show_progress=True,
    loader_cls=PyPDFLoader
)

In [8]:
docs = loader.load()
print(f"Loaded {len(docs)} documents")

100%|██████████| 10/10 [01:14<00:00,  7.40s/it]

Loaded 1724 documents





In [None]:
docs

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)

In [14]:
doc_splits = splitter.split_documents(docs)
print(f"Split into {len(doc_splits)} chunks")

Split into 3816 chunks


In [19]:
from langchain_openai import OpenAIEmbeddings

In [23]:
from langchain_chroma import Chroma

In [24]:
vector_store = Chroma.from_documents(
    doc_splits,
    OpenAIEmbeddings(),
    persist_directory="./rag_db"
)

In [25]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 5,
    }
)

In [30]:
from langchain_core.prompts import ChatPromptTemplate,HumanMessagePromptTemplate,PromptTemplate

In [31]:
prompt = ChatPromptTemplate(
    input_variables=["context", "question"],
    metadata={"description": "A prompt for answering questions based on context."},
    messages=[
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=["context", "question"],
                template = "You are a helpful assistant. Use the following context to answer the question.\
                If you dont't know the answer,just say i could not say your answer.\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"
               
            )
        )
    ]
    
)

In [32]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [36]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI


In [46]:
rag_chain = (
    {"context":retriever | format_docs,"question":RunnablePassthrough()}
    | prompt
    | ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5)
    | StrOutputParser()
)

In [47]:
print(rag_chain.invoke("what is  encoder"))

An encoder is a component of a neural network that maps input data into an embedding space, creating a representation of the input data that can be used for further processing or analysis.
