# Literature Review Helper Agent

This notebook implements a multi-agent system to conduct automated literature reviews.

### Workflow:
1.  **Search Agent**: Searches for papers using ArXiv API and Google Search (DuckDuckGo).
2.  **Selection Agent**: Curates the top 5 most relevant papers from the search results.
3.  **Extraction Agent**: Extracts key findings, methodologies, and conclusions from the selected papers.
4.  **Synthesis Agent**: Writes a structured 5-paragraph literature review with citations.
5.  **Evaluation Agent**: Critiques the quality of the generated review.

In [None]:
# Install necessary libraries
!pip install -q google-generativeai arxiv ddgs python-dotenv

In [None]:
import os
import json
import time
import arxiv
from typing import List, Dict, Any
from dotenv import load_dotenv
from duckduckgo_search import DDGS
import google.generativeai as genai

# Load environment variables
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    print("‚ùå Error: GOOGLE_API_KEY not found in .env file.")
else:
    genai.configure(api_key=GOOGLE_API_KEY)
    print("‚úÖ API Key loaded and Gemini configured.")

# Configuration
MODEL_NAME = "gemini-1.5-flash" # Or gemini-pro
GENERATION_CONFIG = {
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
}

## 1. Search Agent
Combines ArXiv and Web Search.

In [None]:
class SearchAgent:
    def __init__(self):
        self.arxiv_client = arxiv.Client()

    def search_arxiv(self, query: str, max_results=20) -> List[Dict]:
        print(f"   Running ArXiv search for '{query}'...")
        search = arxiv.Search(
            query=query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.Relevance
        )
        
        results = []
        try:
            for result in self.arxiv_client.results(search):
                results.append({
                    "title": result.title,
                    "url": result.entry_id,
                    "abstract": result.summary.replace("\n", " "),
                    "authors": ", ".join([a.name for a in result.authors]),
                    "year": result.published.year,
                    "source": "ArXiv"
                })
        except Exception as e:
            print(f"   ‚ö†Ô∏è ArXiv search failed: {e}")
        return results

    def search_web(self, query: str, max_results=20) -> List[Dict]:
        print(f"   Running Web search for '{query}'...")
        results = []
        try:
            with DDGS() as ddgs:
                # Adding "filetype:pdf" or "research paper" to improve quality
                keywords = f"{query} research paper filetype:pdf"
                ddgs_results = list(ddgs.text(keywords, max_results=max_results))
                
                for r in ddgs_results:
                    results.append({
                        "title": r.get('title', 'No Title'),
                        "url": r.get('href', ''),
                        "abstract": r.get('body', ''),
                        "authors": "Unknown", # Web search often misses authors in snippets
                        "year": "Unknown",
                        "source": "Web"
                    })
        except Exception as e:
            print(f"   ‚ö†Ô∏è Web search failed: {e}")
        return results

    def search(self, query: str) -> List[Dict]:
        print(f"üîé [Search Agent] Starting search for: {query}")
        arxiv_results = self.search_arxiv(query)
        web_results = self.search_web(query)
        
        combined = arxiv_results + web_results
        print(f"‚úÖ [Search Agent] Found {len(combined)} total papers (ArXiv: {len(arxiv_results)}, Web: {len(web_results)})\n")
        return combined

## 2. Selection Agent
Selects the top 5 most relevant papers.

In [None]:
class SelectionAgent:
    def __init__(self, model_name=MODEL_NAME):
        self.model = genai.GenerativeModel(model_name)

    def select_best_papers(self, query: str, papers: List[Dict]) -> List[Dict]:
        print(f"üéØ [Selection Agent] Selecting top 5 papers from {len(papers)} candidates...")
        
        # Prepare prompt
        papers_str = ""
        for i, p in enumerate(papers):
            papers_str += f"ID: {i}\nTitle: {p['title']}\nAbstract: {p['abstract'][:200]}...\nSource: {p['source']}\n\n"

        prompt = f"""
        You are an expert research assistant. I have a list of papers related to the query: "{query}".
        
        Please select the **5 most relevant and high-quality papers** from the list below.
        Return ONLY a JSON array of the 5 selected IDs. 
        Example: [0, 4, 7, 12, 15]

        List of Papers:
        {papers_str}
        """

        try:
            response = self.model.generate_content(prompt)
            text = response.text.strip()
            
            # Clean JSON
            if "```json" in text:
                text = text.split("```json")[1].split("```")[0]
            elif "```" in text:
                text = text.split("```")[1].split("```")[0]
            
            selected_ids = json.loads(text)
            
            selected_papers = []
            for i in selected_ids:
                if 0 <= i < len(papers):
                    selected_papers.append(papers[i])
            
            # Fallback if model fails to return 5
            if len(selected_papers) < 5:
                print("   ‚ö†Ô∏è Model returned fewer than 5 papers, padding with top results.")
                for p in papers:
                    if p not in selected_papers:
                        selected_papers.append(p)
                    if len(selected_papers) == 5:
                        break
            
            selected_papers = selected_papers[:5]
            print(f"‚úÖ [Selection Agent] Selected {len(selected_papers)} papers.")
            return selected_papers

        except Exception as e:
            print(f"‚ùå [Selection Agent] Error: {e}. Returning top 5 raw results.")
            return papers[:5]

## 3. Extraction Agent
Extracts details from the selected papers.

In [None]:
class ExtractionAgent:
    def __init__(self, model_name=MODEL_NAME):
        self.model = genai.GenerativeModel(model_name)

    def extract_details(self, papers: List[Dict]) -> List[Dict]:
        print(f"‚õèÔ∏è [Extraction Agent] Extracting details from {len(papers)} papers...")
        extracted_data = []

        for i, paper in enumerate(papers):
            print(f"   Processing {i+1}/5: {paper['title'][:50]}...")
            prompt = f"""
            Analyze the following paper abstract and extract key details.
            
            Title: {paper['title']}
            Abstract: {paper['abstract']}
            
            Return a JSON object with:
            - "key_findings": (str) Main results or claims.
            - "methodology": (str) How the research was conducted.
            - "relevance": (str) Why this is important.
            """
            
            try:
                response = self.model.generate_content(prompt)
                text = response.text.strip()
                if "```json" in text:
                    text = text.split("```json")[1].split("```")[0]
                elif "```" in text:
                    text = text.split("```")[1].split("```")[0]
                
                data = json.loads(text)
                
                # Merge with original paper data
                paper_data = paper.copy()
                paper_data.update(data)
                extracted_data.append(paper_data)
                
            except Exception as e:
                print(f"   ‚ö†Ô∏è Extraction failed for paper {i}: {e}")
                paper_data = paper.copy()
                paper_data.update({"key_findings": "N/A", "methodology": "N/A", "relevance": "N/A"})
                extracted_data.append(paper_data)
                
        print(f"‚úÖ [Extraction Agent] Finished extraction.\n")
        return extracted_data

## 4. Synthesis Agent
Generates the 5-paragraph report.

In [None]:
class SynthesisAgent:
    def __init__(self, model_name=MODEL_NAME):
        self.model = genai.GenerativeModel(model_name)

    def synthesize_report(self, query: str, extracted_data: List[Dict]) -> str:
        print(f"‚úçÔ∏è [Synthesis Agent] Writing report for '{query}'...")
        
        # Prepare data for prompt
        data_str = json.dumps(extracted_data, indent=2)
        
        prompt = f"""
        You are an academic writer. Write a literature review based on the following 5 papers.
        
        Research Query: "{query}"
        
        Papers Data:
        {data_str}
        
        **Strict Output Format Requirements:**
        1. Write exactly **5 paragraphs**. Each paragraph must focus on ONE paper in the order provided.
        2. At the end of each paragraph, add the citation marker like [1], [2], [3], [4], [5].
        3. After the 5 paragraphs, add a section titled "### References".
        4. In the References section, list the full details for each paper (Title, Authors, Year, URL).
        
        Do not add any other intro or conclusion text. Just the 5 paragraphs and the references.
        """
        
        try:
            response = self.model.generate_content(prompt)
            return response.text
        except Exception as e:
            return f"‚ùå Synthesis failed: {e}"

## 5. Evaluation Agent
Checks the quality of the report.

In [None]:
class EvaluationAgent:
    def __init__(self, model_name=MODEL_NAME):
        self.model = genai.GenerativeModel(model_name)

    def evaluate_report(self, report: str) -> str:
        print(f"‚öñÔ∏è [Evaluation Agent] Evaluating report quality...")
        
        prompt = f"""
        Evaluate the following literature review report.
        
        Report:
        {report}
        
        Check for:
        1. Are there exactly 5 paragraphs?
        2. Are citations [1]-[5] used correctly?
        3. Is the References section present and accurate?
        
        Provide a score (1-10) and brief feedback.
        """
        
        try:
            response = self.model.generate_content(prompt)
            return response.text
        except Exception as e:
            return f"‚ùå Evaluation failed: {e}"

## Coordinator & Main Execution

In [None]:
def main(query: str):
    # Initialize Agents
    search_agent = SearchAgent()
    selection_agent = SelectionAgent()
    extraction_agent = ExtractionAgent()
    synthesis_agent = SynthesisAgent()
    evaluation_agent = EvaluationAgent()
    
    print(f"üöÄ Starting Literature Review for: '{query}'")
    print("="*60)
    
    # 1. Search
    raw_papers = search_agent.search(query)
    if not raw_papers:
        print("‚ùå No papers found. Exiting.")
        return
        
    # 2. Select
    selected_papers = selection_agent.select_best_papers(query, raw_papers)
    
    # 3. Extract
    extracted_data = extraction_agent.extract_details(selected_papers)
    
    # 4. Synthesize
    report = synthesis_agent.synthesize_report(query, extracted_data)
    
    print("\n" + "="*60)
    print("üìù FINAL LITERATURE REVIEW REPORT")
    print("="*60)
    print(report)
    print("="*60 + "\n")
    
    # 5. Evaluate
    evaluation = evaluation_agent.evaluate_report(report)
    print("üìä Evaluation Results:")
    print(evaluation)

# Example Usage
if __name__ == "__main__":
    # You can change the query here
    query = "Multi-Agent Systems in Large Language Models"
    main(query)