In [15]:
#!pip install google-generativeai python-dotenv langchain PyPDF2 faiss-cpu sentence-transformers langchain-community

In [1]:
import os
import google.generativeai as genai
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
import PyPDF2

In [None]:
load_dotenv()
genai.configure(api_key="....")

In [3]:
from google.colab import files
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]
print("📄 PDF uploaded successfully:", pdf_path)

Saving Social institutions – Introduction to the Social Sciences.pdf to Social institutions – Introduction to the Social Sciences.pdf
📄 PDF uploaded successfully: Social institutions – Introduction to the Social Sciences.pdf


In [None]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += (page.extract_text() or "") + "\n"
    return text


In [5]:
def create_vectorstore(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_text(text)
    docs = [Document(page_content=chunk) for chunk in chunks]

    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore


In [10]:
def answer_query(query, retriever):
    relevant_docs = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in relevant_docs])

    model = genai.GenerativeModel("gemini-2.5-pro")
    prompt = f"""
    You are an intelligent assistant. Use the context below to answer the user's question accurately and clearly.

    Context:
    {context}

    Question:
    {query}
    """

    response = model.generate_content(prompt)
    return response.text.strip()

In [13]:
def rag_chat(pdf_path):
    print("🔍 Reading PDF and creating knowledge base...")
    text = extract_text_from_pdf(pdf_path)
    vectorstore = create_vectorstore(text)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    print("✅ Setup complete! You can now ask questions about your PDF.")
    print("Type 'quit' or 'exit' to stop.\n")

    while True:
        query = input("You: ")
        if query.lower() in ["quit", "exit"]:
            print("👋 Exiting chat. Goodbye!")
            break

        answer = answer_query(query, retriever)
        print(f"\n🤖: {answer}\n")


In [14]:
if __name__ == "__main__":
    rag_chat(pdf_path)


🔍 Reading PDF and creating knowledge base...
✅ Setup complete! You can now ask questions about your PDF.
Type 'quit' or 'exit' to stop.

You: What are instituions basically?

🤖: Based on the context provided, institutions are social structures that the state has an overarching role in organizing.

The text gives the following examples of institutions:
*   Education
*   The economy
*   The criminal justice system
*   The family
*   The university

You: How marriage is an institution

🤖: Based on the context provided, while the text focuses on the **family** as a social institution, marriage can be understood as a key component of that institution. Here's how:

*   **The Family as an Institution:** The text explicitly names "the family unit" as a social institution. It is described as a complex social structure with its own norms, rules, and conventions.

*   **Upholding Social Norms:** The family as an institution sets a "model" for what is considered a legitimate family structure. It u