In [None]:
from langchain.document_loaders import Docx2txtLoader
from langchain_community.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import os

# Load DOCX files from folder
folder_path =  "A4/Physics/physics"
all_docs = []
for filename in os.listdir(folder_path):
    if filename.endswith(".docx"):
        loader = Docx2txtLoader(os.path.join(folder_path, filename))
        all_docs.extend(loader.load())

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(all_docs)

print(f"✅ Total Chunks: {len(chunks)}")

# Create Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Build FAISS Vector Store
vectorstore = FAISS.from_documents(chunks, embedding_model)
vectorstore.save_local("faiss_index")

print("✅ FAISS index saved.")


In [None]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

# load Embedding model again
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# while loading FAISS index keep allow_dangerous_deserialization=True 
vectorstore = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)

# load Text generation model (GPT2 is simple and fast)
generator = pipeline("text-generation", model="gpt2", max_new_tokens=200)
llm = HuggingFacePipeline(pipeline=generator)

# create QA chain 
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

print("QA Chain ready to use!")


In [None]:
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("Bye! 👋")
        break
    response = qa_chain.invoke({"query": query})  # invoke returns a dict
    answer = response["result"]  # gets main answer 
    print("Bot:", answer)
