<a href="https://colab.research.google.com/github/Bheemrajpaidipelli/AI_Research_Assistant_Agent/blob/main/AI_Research_Assistant_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langgraph langchain langchain-google-genai sentence-transformers faiss-cpu pypdf




In [None]:
pip install -U langchain langchain-community langchain-text-splitters




In [None]:
# importing Necessary Libraries

In [None]:
import os
import getpass
from typing import TypedDict, List

from langgraph.graph import StateGraph, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader





In [None]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter Gemini API Key: ")


Enter Gemini API Key: ··········


In [None]:
# Intialize LLM

In [None]:
llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.5-flash",
    temperature=0.2
)


In [None]:
# Define Graph State

In [None]:
class ResearchState(TypedDict):
    question: str
    intent: str
    documents: List[str]
    reasoning: str
    gaps: str
    final_answer: str


In [None]:
# Node 1 Intent Classification

In [None]:
def classify_intent(state: ResearchState):
    prompt = f"""
    Classify the research intent of this question into one of:
    - literature_survey
    - experimental
    - implementation

    Question: {state['question']}
    """
    response = llm.invoke(prompt)
    return {"intent": response.content.strip().lower()}


In [None]:
# Router Logic

In [None]:
def route_intent(state: ResearchState):
    if "survey" in state["intent"]:
        return "retrieve_docs"
    elif "experiment" in state["intent"]:
        return "retrieve_docs"
    else:
        return "retrieve_docs"


In [None]:
# Node  2 Retriver

In [None]:
from google.colab import files
uploaded = files.upload()

# Get uploaded filename
filename = list(uploaded.keys())[0]
print("Uploaded file:", filename)

# Load PDF correctly
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(f"/content/{filename}")
docs = loader.load()

print("Pages loaded:", len(docs))

# ---- TEXT SPLITTING ----
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

# IMPORTANT: use docs, NOT uploaded
chunks = splitter.split_documents(docs)
print("Total chunks:", len(chunks))

# ---- EMBEDDINGS ----
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# ---- VECTOR STORE ----
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embeddings)

# ---- RETRIEVER ----
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

print("Retriever ready!")


Saving brain-tumour-image-segmentation-using-deep-networks-3myy0a6c40.pdf to brain-tumour-image-segmentation-using-deep-networks-3myy0a6c40 (6).pdf
Uploaded file: brain-tumour-image-segmentation-using-deep-networks-3myy0a6c40 (6).pdf
Pages loaded: 10
Total chunks: 108


  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Retriever ready!


In [None]:
# ---- LANGGRAPH RETRIEVER NODE (FIXED) ----
def retrieve_docs(state):
    """
    LangGraph node function:
    Retrieves relevant document chunks using FAISS
    (Compatible with latest LangChain)
    """
    query = state["question"]

    # ✅ NEW API
    retrieved_docs = retriever.invoke(query)

    texts = [doc.page_content for doc in retrieved_docs]

    return {
        "documents": texts
    }


In [None]:
# Node 3 Reasoning Agent

In [None]:
def reasoning_agent(state: ResearchState):
    context = "\n\n".join(state["documents"])
    prompt = f"""
    Using the following research context, explain the key ideas,
    methods, and findings related to the question.

    Context:
    {context}

    Question:
    {state['question']}
    """
    response = llm.invoke(prompt)
    return {"reasoning": response.content}


In [None]:

# Node 4 Gap Analysis Agent

In [None]:
def gap_analysis_agent(state: ResearchState):
    prompt = f"""
    Based on the following research summary, identify:
    - Limitations
    - Open research gaps
    - Possible future directions

    Summary:
    {state['reasoning']}
    """
    response = llm.invoke(prompt)
    return {"gaps": response.content}


In [None]:
# Fianl Synthesizer

In [None]:
def final_synthesizer(state: ResearchState):
    prompt = f"""
    Create a structured research assistant response with:
    - Brief overview
    - Key methods
    - Research gaps
    - Future directions

    Reasoning:
    {state['reasoning']}

    Gaps:
    {state['gaps']}
    """
    response = llm.invoke(prompt)
    return {"final_answer": response.content}


In [None]:
# Building Lang Graph

In [None]:
graph = StateGraph(ResearchState)

graph.add_node("classify_intent", classify_intent)
graph.add_node("retrieve_docs", retrieve_docs)
graph.add_node("reasoning", reasoning_agent)
graph.add_node("gap_analysis", gap_analysis_agent)
graph.add_node("final", final_synthesizer)

graph.set_entry_point("classify_intent")

graph.add_conditional_edges(
    "classify_intent",
    route_intent,
    {
        "retrieve_docs": "retrieve_docs"
    }
)

graph.add_edge("retrieve_docs", "reasoning")
graph.add_edge("reasoning", "gap_analysis")
graph.add_edge("gap_analysis", "final")
graph.add_edge("final", END)

app = graph.compile()


In [None]:
# Run the Agent

In [None]:
query = input("Enter your research question: ")

result = app.invoke({
    "question": query
})

print("\n--- AI Research Assistant Output ---\n")
print(result["final_answer"])


Enter your research question: How is Retrieval-Augmented Generation used in healthcare decision support systems?

--- AI Research Assistant Output ---

This response is structured based on the provided research context, focusing on its current scope, identified limitations, and potential advancements, particularly highlighting the absence of Retrieval-Augmented Generation (RAG).

---

### Research Assistant Response: Brain Tumor Segmentation using Machine Learning

#### 1. Brief Overview

The provided research primarily focuses on the application of machine learning (ML) and deep learning (DL) techniques for the automated enhancement and segmentation of brain tumors, specifically gliomas. The core objective is to improve the efficiency of human diagnostic practices and automate challenging tasks in medical image analysis. The research details the performance of various ML models in segmenting key tumor regions: "enhancing tumour," "whole tumour," and "tumour core."

#### 2. Key Methods