In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate

# ----------------------------
# 1. Load Documents
# ----------------------------
docs = []
# Example: load a PDF
loader = PyPDFLoader("sample.pdf")
docs.extend(loader.load())

# Example: load a TXT
loader2 = TextLoader("notes.txt", encoding="utf-8")
docs.extend(loader2.load())

# ----------------------------
# 2. Split Documents into Chunks
# ----------------------------
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

# ----------------------------
# 3. Embeddings + VectorStore
# ----------------------------
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)

# ----------------------------
# 4. LLM
# ----------------------------
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# ----------------------------
# 5. Prompt Template
# ----------------------------
prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant. 
Use the following context to answer the user question.
If you don’t know the answer, say "I don’t know".

Context:
{context}

Question: {question}
Answer:
""")

# ----------------------------
# 6. RAG Chain
# ----------------------------
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",   # can be "map_reduce", "refine"
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True,
)

# ----------------------------
# 7. Run a Query
# ----------------------------
query = "What is mentioned about neural networks?"
result = qa_chain({"query": query})

print("Answer:", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print("-", doc.metadata)
