# ü§ñ LitReview AI - Advanced Literature Review Assistant

> **Capstone Project for Google's 5-Day AI Agent Intensive Course**

LitReview AI is a multi-agent system powered by Google's Agent Development Kit (ADK) and Gemini 2.5 Flash. It automates the process of conducting a literature review by searching for papers, selecting the most relevant ones, extracting key findings, and synthesizing a professional report with citations.

## 1. Install Dependencies

In [None]:
!pip install -q google-generativeai google-adk arxiv duckduckgo-search ddgs python-dotenv

## 2. Setup & Configuration

In [None]:
import os
import json
import asyncio
import arxiv
from duckduckgo_search import DDGS
from google.adk.agents import LlmAgent, SequentialAgent, LoopAgent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.genai import types

# --- API KEY SETUP ---
# For Kaggle, use Kaggle Secrets
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    GOOGLE_API_KEY = user_secrets.get_secret("GOOGLE_API_KEY")
except ImportError:
    # Fallback for local testing
    from dotenv import load_dotenv
    load_dotenv()
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    print("‚ö†Ô∏è WARNING: GOOGLE_API_KEY not found! Please set it in Kaggle Secrets or .env")
else:
    # CRITICAL: Set the environment variable so the Google Gen AI SDK can find it
    os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
    print("‚úÖ API Key loaded and set in environment")

# Configure Retry Options
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504],
)

# Initialize Model
model = Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config)

## 3. Define Tools

In [None]:
def search_papers_tool(query: str) -> str:
    """
    Searches for research papers on ArXiv and Web.
    Args:
        query: The research topic to search for.
    Returns:
        A JSON string containing a list of papers.
    """
    print(f"üîç Searching for: {query}...")
    papers = []
    
    # 1. ArXiv Search
    try:
        arxiv_client = arxiv.Client()
        search = arxiv.Search(query=query, max_results=20, sort_by=arxiv.SortCriterion.Relevance)
        for result in arxiv_client.results(search):
            papers.append({
                "title": result.title,
                "url": result.entry_id,
                "abstract": result.summary.replace("\n", " "),
                "authors": ", ".join([a.name for a in result.authors]),
                "year": result.published.year,
                "source": "ArXiv"
            })
    except Exception as e:
        print(f"ArXiv error: {e}")

    # 2. Web Search (DDGS)
    try:
        with DDGS() as ddgs:
            keywords = f"{query} research paper"
            results = list(ddgs.text(keywords, max_results=20))
            for r in results:
                papers.append({
                    "title": r.get('title', 'No Title'),
                    "url": r.get('href', ''),
                    "abstract": r.get('body', ''),
                    "authors": "Unknown",
                    "year": "Unknown",
                    "source": "Web"
                })
    except Exception as e:
        print(f"Web search error: {e}")
    
    print(f"‚úÖ Found {len(papers)} papers total")
    return json.dumps(papers)

## 4. Define Agents

In [None]:
# 1. Search Agent
search_agent = LlmAgent(
    name="SearchAgent",
    model=model,
    instruction="""
    You are a Research Librarian.
    Your goal is to find a broad list of research papers for a given query.
    Use the `search_papers_tool` to get the raw data.
    
    Output: Return the raw JSON list of papers found.
    """,
    tools=[search_papers_tool],
    description="Searches for research papers on ArXiv and Web"
)

# 2. Selection Agent
selection_agent = LlmAgent(
    name="SelectionAgent",
    model=model,
    instruction="""
    You are a Senior Editor.
    Input: The list of research papers provided by the previous agent.
    Task: Select the **top 5** most relevant and high-quality papers.
    
    Sorting Logic:
    - Prioritize papers with a known Year.
    - Sort the final 5 papers by Year (Descending/Newest First).
    - The JSON array MUST be ordered such that index 0 is the newest paper.
    
    Output: Return the SORTED JSON list of 5 papers.
    """,
    description="Selects top 5 papers from search results"
)

# 3. Extraction Agent
extraction_agent = LlmAgent(
    name="ExtractionAgent",
    model=model,
    instruction="""
    You are a Research Analyst.
    Input: The list of 5 selected papers provided by the previous agent.
    Task: For each paper, extract:
    - key_findings
    - methodology
    - relevance
    
    Output: Return the enriched JSON list with these details added.
    """,
    description="Extracts key findings from papers"
)

# 4. Synthesis Agent (Iterative)
synthesis_agent = LlmAgent(
    name="SynthesisAgent",
    model=model,
    instruction="""
    You are an Academic Writer.
    Input: 
    - First run: A list of 5 analyzed papers.
    - Subsequent runs: Your previous draft AND the Reviewer's feedback.
    
    Task: Write (or rewrite) a comprehensive literature review report.
    
    If you receive feedback, use it to IMPROVE your draft. Fix any issues mentioned.
    
    CRITICAL OUTPUT FORMAT:
    - Write EXACTLY 5 paragraphs, one for each paper.
    - **ORDER**: Discuss papers in the exact order provided (which is sorted by date).
    - **PARAGRAPH START**: Start EACH paragraph with the first author's name and "et al." (e.g., "Pan et al. ...").
    - **CITATION**: End each paragraph with a sequential citation marker: [1], [2], [3], [4], [5].
    
    - **REFERENCES SECTION**:
      Add a "### References" section at the end.
      You MUST format this as a list.
      CRITICAL: Put a BLANK LINE (double newline) between each reference.
      
      Example format:
      [1] Title, Authors, Year, URL
      
      [2] Title, Authors, Year, URL
      ...
    
    Output: Return the full literature review text.
    """,
    description="Writes literature review report"
)

# 5. Evaluation Agent (Iterative)
evaluation_agent = LlmAgent(
    name="EvaluationAgent",
    model=model,
    instruction="""
    You are a Reviewer.
    Input: The literature review report provided by the previous agent.
    Task: Evaluate if it follows the 5-paragraph format and has correct citations.
    
    OUTPUT:
    - First, provide your Score (1-10) and brief feedback.
    - Then, output the ORIGINAL literature review text exactly as received.
    
    If the score is low (< 8), be very specific about what needs to be fixed in your feedback.
    
    IMPORTANT: You are the final step of the loop. Return the full review text.
    """,
    description="Evaluates literature review quality"
)

# Refinement Loop
refinement_loop = LoopAgent(
    name="RefinementLoop",
    description="Iteratively improves the literature review",
    sub_agents=[synthesis_agent, evaluation_agent],
    max_iterations=2
)

# Main Workflow
workflow = SequentialAgent(
    name="LitReviewWorkflow",
    description="Full literature review workflow",
    sub_agents=[
        search_agent,
        selection_agent,
        extraction_agent,
        refinement_loop 
    ]
)

## 5. Execution

In [None]:
async def run_literature_review(topic: str):
    print(f"üöÄ Starting Literature Review for: {topic}")
    
    runner = InMemoryRunner(agent=workflow)
    result = await runner.run_debug(topic)
    
    # Extract final output
    final_output = "No output generated."
    for event in reversed(result):
        if hasattr(event, 'content') and event.content:
            if hasattr(event.content, 'parts'):
                for part in event.content.parts:
                    if hasattr(part, 'text') and part.text:
                        final_output = part.text.strip()
                        break
            if final_output != "No output generated.":
                break
    
    print("\n" + "="*60)
    print("FINAL LITERATURE REVIEW")
    print("="*60 + "\n")
    print(final_output)

In [None]:
# Run the review (Change the topic here)
TOPIC = "Multi-Agent Systems in Healthcare"

if GOOGLE_API_KEY:
    await run_literature_review(TOPIC)
else:
    print("‚ùå Cannot run: API Key missing")