In [6]:
# Install Dependencies
%pip install -U langchain langchain-community langgraph langchain-google-genai pymupdf unstructured python-docx pillow ipywidgets



In [7]:
# Set Gemini API key
import os
import getpass
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Paste your Google Gemini API key: ")


Paste your Google Gemini API key: ··········


In [8]:
# Upload documents (PDF, Word, image, etc.)
from google.colab import files
from pathlib import Path
from langchain_community.document_loaders import (
    PyMuPDFLoader, UnstructuredFileLoader, UnstructuredImageLoader,
    UnstructuredWordDocumentLoader, UnstructuredMarkdownLoader
)

print("Please upload your files below")
uploaded = files.upload()
documents = []

for file in uploaded.keys():
    file_path = Path(f"/content/{file}")
    ext = file_path.suffix.lower()
    try:
        if ext == ".pdf":
            loader = PyMuPDFLoader(str(file_path))
        elif ext in [".doc", ".docx"]:
            loader = UnstructuredWordDocumentLoader(str(file_path))
        elif ext in [".png", ".jpg", ".jpeg"]:
            loader = UnstructuredImageLoader(str(file_path))
        elif ext == ".md":
            loader = UnstructuredMarkdownLoader(str(file_path))
        else:
            loader = UnstructuredFileLoader(str(file_path))

        documents.extend(loader.load())
        print(f"✓ File processed: {file}")
    except Exception as e:
        print(f"Skipped {file}: {e}")

print(f"Total documents added: {len(documents)}")

# Embed document chunks using Gemini embeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = splitter.split_documents(documents)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(splits)
print("Document chunks are embedded and indexed.")

# Gemini model setup and prompting
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate

llm = GoogleGenerativeAI(model="models/gemini-2.0-flash")
prompt = PromptTemplate.from_template("""Refer to the context below and answer the question accurately:

{context}

Question: {question}
Your Response (keep it short and clear):""")

# Define RAG process
from typing_extensions import TypedDict, List
from langchain_core.documents import Document
from langgraph.graph import StateGraph, START

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retrieve(state: State):
    return {"context": vector_store.similarity_search(state["question"])}

def generate(state: State):
    context_text = "\n\n".join(doc.page_content for doc in state["context"])
    prompt_input = prompt.invoke({"question": state["question"], "context": context_text})
    reply = llm.invoke(prompt_input)
    return {"answer": reply}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

# Run the chatbot
print("\nChatbot ready. Start asking questions based on uploaded files.")
while True:
    user_input = input("Ask something (or type 'exit'): ")
    if user_input.strip().lower() == "exit":
        print("Session finished.")
        break
    try:
        result = graph.invoke({"question": user_input})
        print("Response:", result["answer"])
    except Exception as err:
        print("Error while processing:", err)


Please upload your files below


Saving ML Lec 2 - Part 1.pdf to ML Lec 2 - Part 1.pdf
✓ File processed: ML Lec 2 - Part 1.pdf
Total documents added: 37
Document chunks are embedded and indexed.

Chatbot ready. Start asking questions based on uploaded files.
Ask something (or type 'exit'): what is neural network?
Response: Based on the provided context, a neural network is:

*   A collection of perceptrons.
*   A set of organized (sequential) layers.
*   Inspired by biological neural networks.
Ask something (or type 'exit'): How to Choose an Activation Function?
Response: Choose an activation function that is continuous, differentiable (almost everywhere), has a derivative that doesn't saturate or explode over its expected input range.
Ask something (or type 'exit'): what is Loss Function ?
Response: A measure of how well the network predicts values. The goal of the network is to minimize the loss function.
Ask something (or type 'exit'): exit
Session finished.
