Data Loading with Langchhain

In [1]:
from langchain.document_loaders import PyPDFLoader
#loading the data from a pdf file
data="D:\MIT_ADT\Intern\Build fast\RAG_dataset.pdf"
loader=PyPDFLoader(data)

#processing the data into the document that langchain can use in further tasks
documents=loader.load()
#print(f"Loaded {len(documents)} documents from the datasets.")

In [2]:
print(f"Loaded {len(documents)} documents from the datasets.")  #give the number of pages in the pdf

Loaded 100 documents from the datasets.


Setting up the RAG with langchain

In [3]:
import getpass  #propmts the user for imput and hides the text they type(API KEY)
import os
from langchain_openai import OpenAIEmbeddings

# ensuring that the openai key is set
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

In [4]:
from langchain.embeddings import OpenAIEmbeddings #converting the doc into vector embeddings (numerical representation)
from langchain.vectorstores import FAISS #storing the ebedded doc into teh FAISS vector
# Retrieve the OpenAI API key from the environment variable
#creating the embeddings and setting up the FAISS vectorstore
embeddings=OpenAIEmbeddings()
vectorstore=FAISS.from_documents(documents,embeddings)
retriever=vectorstore.as_retriever()  #Converting the faiss vector store into a retriever that can fetch relevant docs based on teh query"""


  embeddings=OpenAIEmbeddings()


Chatbot construction

In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template="You are a helpful assistant. Based on the context below, answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
    input_variables=["context", "question"],
)
# Initialize the chat model (gpt-3.5-turbo or gpt-4)
import os
chat_model = ChatOpenAI( openai_api_key=os.environ["OPENAI_API_KEY"])
# loading the lm 
llm = ChatOpenAI(model="gpt-3.5-turbo") 
# Set up the retrieval QA pipeline
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # You could also experiment with other chain types like "map_reduce"
    retriever=retriever,
    return_source_documents=True,  # This will return the source documents alongside the answer
    chain_type_kwargs={"prompt": prompt}
)
 #this retrieverQA ties LLM with a retriever(FAISS)tos et up the pipleine for answering questions
print("rag pipeline setup completed.")

rag pipeline setup completed.


  chat_model = ChatOpenAI( openai_api_key=os.environ["OPENAI_API_KEY"])


In [6]:
queryy = "What is the outline of this paper?"

result = qa({"query": queryy}) # retrieving the the answer  and documents by running the query
answer = result['result'] #accessing the ans and source docs sepaartely
source_documents = result['source_documents']

# Display the answer and source documents
print("Answer:", answer)
print("Source Documents:", source_documents)

  result = qa({"query": queryy}) # retrieving the the answer  and documents by running the query


Answer: The outline of this paper includes an introduction, Meta Chain-Of-Thought, and Towards Deliberate Reasoning With Language Models - Search.
Source Documents: [Document(id='d724a927-c8de-433f-bafd-57a0bf8a4ba0', metadata={'source': 'D:\\MIT_ADT\\Intern\\Build fast\\RAG_dataset.pdf', 'page': 51}, page_content='Towards System 2 Reasoning in LLMs: Learning How to Think With Meta Chain-of-Thought\n10. Acknowledgments\nWe would like to thank Aviral Kumar, Benjamin Eysenbach, Nathan Lambert, Rishabh Agarwal, Sasha\nRush and Noah Goodman for the fruitful discussions and feedback on this report.\n52'), Document(id='f2b64216-181f-4b4e-a605-38840927ef17', metadata={'source': 'D:\\MIT_ADT\\Intern\\Build fast\\RAG_dataset.pdf', 'page': 2}, page_content='Towards System 2 Reasoning in LLMs: Learning How to Think With Meta Chain-of-Thought\n8 Going Forward 43\n8.1 The "Big MATH" Project . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44\n8.1.1 Data Sourcing . . . . . . . . .