<a href="https://colab.research.google.com/github/AlessiaLeoFolliero/Practicing-with-python-/blob/main/Reformulation_RAGtutorial_for_pdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://realpython.com/build-llm-rag-chatbot-with-langchain/

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

# Load PDFs
loader = PyPDFLoader("hospital_reviews.pdf")  # Path to your PDF file
documents = loader.load()

# Chunk the text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Maximum characters per chunk
    chunk_overlap=50,  # Overlap between chunks
)
docs = text_splitter.split_documents(documents)

# Create vector index using FAISS
faiss_index = FAISS.from_documents(docs, OpenAIEmbeddings())

# Define prompts
review_template = """Your job is to use patient
reviews to answer questions about their experience at a hospital. Use
the following context to answer questions. Be as detailed as possible, but
don't make up any information that's not from the context. If you don't know
an answer, say you don't know.
{context}
"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["context"], template=review_template)
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["question"], template="{question}")
)
messages = [review_system_prompt, review_human_prompt]

review_prompt = ChatPromptTemplate(
    input_variables=["context", "question"], messages=messages
)

# Create RetrievalQA chain
reviews_pdf_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-4", temperature=0),
    chain_type="stuff",
    retriever=faiss_index.as_retriever(k=10),
)
reviews_pdf_chain.combine_documents_chain.llm_chain.prompt = review_prompt

# Example question
question = "What do patients say about their experience with Dr. Smith?"
response = reviews_pdf_chain.run(question)
print(response)
