In [36]:
!pip install google-generativeai tavily-python python-dotenv



In [37]:
import os
import json
import time
from datetime import datetime
from typing import List, Dict, Any
import google.generativeai as genai
from tavily import TavilyClient
from google.colab import files
import io

In [38]:
# Set up your API keys here
GEMINI_API_KEY = "AIzaSyC-0m9Hqa-ov82NOXV13EcZJXbN2Ne8nmc"  # Replace with your actual Gemini API key
TAVILY_API_KEY = "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"  # Replace with your actual Tavily API key
# Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)

# Initialize Tavily client
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)

In [39]:


class ReActWebResearchAgent:
    """
    ReAct (Reasoning + Acting) Agent for Web Research

    This agent uses the ReAct pattern to:
    1. Plan: Generate research questions using LLM
    2. Act: Search the web for answers
    3. Compile: Create a structured report
    """

    def __init__(self, gemini_api_key: str, tavily_api_key: str):
        """Initialize the agent with API keys"""
        try:
            # Configure Gemini
            genai.configure(api_key=gemini_api_key)
            self.model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")


            # Initialize Tavily
            self.tavily_client = TavilyClient(api_key=tavily_api_key)

            print("✅ ReAct Agent initialized successfully!")

        except Exception as e:
            print(f"❌ Error initializing agent: {str(e)}")
            raise

    def generate_research_questions(self, topic: str, num_questions: int = 5) -> List[str]:
        """
        PLANNING PHASE: Use LLM to generate research questions

        Args:
            topic (str): The research topic
            num_questions (int): Number of questions to generate (supports 1-20 questions)

        Returns:
            List[str]: List of research questions
        """
        print(f"🧠 PLANNING PHASE: Generating {num_questions} research questions for '{topic}'...")

        # Adjust prompt based on number of questions requested
        if num_questions <= 5:
            depth_instruction = "Focus on the most essential aspects"
        elif num_questions <= 10:
            depth_instruction = "Cover both fundamental and detailed aspects"
        else:
            depth_instruction = "Provide comprehensive coverage including niche and specialized aspects"

        prompt = f"""
        You are an expert researcher. Generate exactly {num_questions} well-structured, comprehensive research questions about the topic: "{topic}"

        Instructions:
        - {depth_instruction}
        - Each question should be specific and answerable through web research
        - Cover different angles: definitions, causes, effects, history, current state, future prospects, challenges, solutions
        - Progress from basic to more complex/specialized aspects
        - Avoid duplicate or overly similar questions
        - Make questions suitable for creating a comprehensive research report

        Question categories to consider (adjust based on topic):
        - Basic definition and overview
        - Historical background and evolution
        - Current state and recent developments
        - Key factors, causes, or components
        - Impacts and effects
        - Challenges and problems
        - Solutions and responses
        - Future trends and prospects
        - Comparative aspects
        - Specialized/technical aspects (for higher question counts)

        Format your response as a numbered list of questions only, without any additional text or explanations.

        Generate exactly {num_questions} questions:
        """

        try:
            response = self.model.generate_content(prompt)
            questions_text = response.text.strip()

            # Parse questions from numbered list
            questions = []
            for line in questions_text.split('\n'):
                line = line.strip()
                if line and (line[0].isdigit() or line.startswith('•') or line.startswith('-')):
                    # Remove numbering and clean up
                    if '.' in line:
                        question = line.split('.', 1)[-1].strip()
                    elif ')' in line:
                        question = line.split(')', 1)[-1].strip()
                    else:
                        question = line.strip()

                    if question and len(question) > 10:  # Ensure it's a substantial question
                        questions.append(question)

            # If we didn't get enough questions, generate additional ones
            if len(questions) < num_questions:
                print(f"⚠️ Only got {len(questions)} questions, generating additional ones...")
                additional_needed = num_questions - len(questions)

                additional_prompt = f"""
                Generate {additional_needed} more specific research questions about "{topic}" that are different from these existing questions:
                {chr(10).join(f"- {q}" for q in questions)}

                Format as numbered list starting from {len(questions) + 1}.
                """

                additional_response = self.model.generate_content(additional_prompt)
                additional_text = additional_response.text.strip()

                for line in additional_text.split('\n'):
                    line = line.strip()
                    if line and (line[0].isdigit() or line.startswith('•') or line.startswith('-')):
                        if '.' in line:
                            question = line.split('.', 1)[-1].strip()
                        elif ')' in line:
                            question = line.split(')', 1)[-1].strip()
                        else:
                            question = line.strip()

                        if question and len(question) > 10:
                            questions.append(question)

                        if len(questions) >= num_questions:
                            break

            # Ensure we return exactly the requested number
            questions = questions[:num_questions]

            print(f"✅ Generated {len(questions)} research questions")
            return questions

        except Exception as e:
            # print(f"❌ Error generating questions: {str(e)}")
            # Fallback questions
            fallback_questions = [
                f"What is {topic} and why is it important?",
                f"What are the main aspects or components of {topic}?",
                f"How has {topic} evolved over time?",
                f"What are the current challenges related to {topic}?",
                f"What solutions or approaches exist for {topic}?",
                f"What are the future prospects for {topic}?",
                f"What are the key benefits and drawbacks of {topic}?",
                f"How does {topic} impact society or individuals?",
                f"What are the latest developments in {topic}?",
                f"What expert opinions exist about {topic}?"
            ]
            return fallback_questions[:num_questions]

    def search_web_for_question(self, question: str, max_results: int = 3) -> Dict[str, Any]:
        """
        ACTING PHASE: Search the web for a specific question

        Args:
            question (str): The research question
            max_results (int): Maximum number of search results to retrieve

        Returns:
            Dict: Search results with title and content
        """
        print(f"🔍 Searching web for: {question}")

        try:
            # Use Tavily to search
            search_results = self.tavily_client.search(
                query=question,
                search_depth="basic",
                max_results=max_results
            )

            # Extract title and content from results
            processed_results = {
                'question': question,
                'results': [],
                'search_successful': True
            }

            if 'results' in search_results:
                for result in search_results['results']:
                    processed_result = {
                        'title': result.get('title', 'No title'),
                        'content': result.get('content', 'No content available'),
                        'url': result.get('url', 'No URL')
                    }
                    processed_results['results'].append(processed_result)

            print(f"✅ Found {len(processed_results['results'])} results")
            return processed_results

        except Exception as e:
            print(f"❌ Error searching for question '{question}': {str(e)}")
            return {
                'question': question,
                'results': [{'title': 'Search Error', 'content': f'Unable to search: {str(e)}', 'url': 'N/A'}],
                'search_successful': False
            }

    def compile_report(self, topic: str, research_data: List[Dict]) -> str:
        """
        Compile all research data into a structured report

        Args:
            topic (str): The research topic
            research_data (List[Dict]): All search results data

        Returns:
            str: Formatted report
        """
        print("📝 Compiling structured report...")

        report_lines = []

        # Report Header
        report_lines.append("=" * 80)
        report_lines.append(f"WEB RESEARCH REPORT: {topic.upper()}")
        report_lines.append("=" * 80)
        report_lines.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        report_lines.append(f"Research Method: ReAct Pattern (Reasoning + Acting)")
        report_lines.append("")

        # Introduction
        report_lines.append("INTRODUCTION")
        report_lines.append("-" * 40)
        report_lines.append(f"This report presents comprehensive research findings on the topic of '{topic}'. ")
        report_lines.append("The research was conducted using an AI agent following the ReAct pattern, which combines ")
        report_lines.append("reasoning (planning research questions) and acting (web search) to gather comprehensive information.")
        report_lines.append("")

        # Research Sections
        for i, data in enumerate(research_data, 1):
            question = data['question']
            results = data['results']

            report_lines.append(f"{i}. {question}")
            report_lines.append("-" * (len(f"{i}. {question}")))

            if data['search_successful'] and results:
                for j, result in enumerate(results, 1):
                    report_lines.append(f"   {i}.{j} {result['title']}")
                    report_lines.append(f"       {result['content'][:300]}...")
                    report_lines.append(f"       Source: {result['url']}")
                    report_lines.append("")
            else:
                report_lines.append("   No reliable information found for this question.")
                report_lines.append("")

        # Conclusion
        report_lines.append("CONCLUSION")
        report_lines.append("-" * 40)
        successful_searches = sum(1 for data in research_data if data['search_successful'])
        total_results = sum(len(data['results']) for data in research_data)

        report_lines.append(f"This research covered {len(research_data)} key questions about {topic}. ")
        report_lines.append(f"Successfully gathered information from {total_results} web sources across {successful_searches} search queries. ")
        report_lines.append("The findings provide a comprehensive overview of the current state of knowledge on this topic.")
        report_lines.append("")
        report_lines.append("=" * 80)

        report = "\n".join(report_lines)
        print("✅ Report compiled successfully!")
        return report

    def research_topic(self, topic: str, num_questions: int = 5) -> str:
        """
        Main method to research a topic using the ReAct pattern

        Args:
            topic (str): The topic to research
            num_questions (int): Number of research questions to generate

        Returns:
            str: Complete research report
        """
        print(f"🚀 Starting ReAct research process for topic: '{topic}'")
        print("=" * 60)

        # STEP 1: PLANNING - Generate research questions
        questions = self.generate_research_questions(topic, num_questions)

        if not questions:
            return "Error: Could not generate research questions."

        print(f"\n📋 Research Questions Generated:")
        for i, q in enumerate(questions, 1):
            print(f"   {i}. {q}")
        print()

        # STEP 2: ACTING - Search for each question
        all_research_data = []

        for i, question in enumerate(questions, 1):
            print(f"\n--- Processing Question {i}/{len(questions)} ---")
            search_data = self.search_web_for_question(question)
            all_research_data.append(search_data)

            # Small delay to avoid rate limiting
            time.sleep(1)

        # STEP 3: COMPILE - Generate report
        print(f"\n{'='*60}")
        final_report = self.compile_report(topic, all_research_data)

        return final_report

In [40]:



def save_report_to_file(report: str, topic: str) -> str:
    """Save report to a text file and offer download"""
    filename = f"research_report_{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

    with open(filename, 'w', encoding='utf-8') as f:
        f.write(report)

    print(f"💾 Report saved as: {filename}")

    # Offer download in Colab
    try:
        files.download(filename)
        print("📥 Download started!")
    except:
        print("Note: Download may not work in all environments. File saved locally.")

    return filename

def get_user_input():
    """Get topic input from user"""
    print("🎯 Topic Input Options:")
    print("1. Enter a custom topic")
    print("2. Choose from example topics")

    choice = input("\nEnter your choice (1 or 2): ").strip()

    if choice == "2":
        example_topics = [
            "Climate Change",
            "Artificial Intelligence in Healthcare",
            "Renewable Energy Technologies",
            "Cybersecurity in 2024",
            "Space Exploration"
        ]

        print("\n📋 Example Topics:")
        for i, topic in enumerate(example_topics, 1):
            print(f"   {i}. {topic}")

        topic_choice = input(f"\nChoose a topic (1-{len(example_topics)}): ").strip()

        try:
            topic_index = int(topic_choice) - 1
            if 0 <= topic_index < len(example_topics):
                return example_topics[topic_index]
            else:
                print("Invalid choice. Using default topic.")
                return "Climate Change"
        except ValueError:
            print("Invalid input. Using default topic.")
            return "Climate Change"

    else:
        topic = input("\n📝 Enter your research topic: ").strip()
        return topic if topic else "Climate Change"

In [41]:


def main():
    """Main function to run the ReAct agent"""

    print("🤖 ReAct Web Research Agent")
    print("Building AI Agents from Scratch - Assignment Implementation")
    print("=" * 60)

    # Get API keys (you'll need to replace these with your actual keys)
    print("\n🔑 API Configuration:")
    print("Please make sure you have set your API keys in the configuration section above.")

    if GEMINI_API_KEY == "YOUR_GEMINI_API_KEY_HERE" or TAVILY_API_KEY == "YOUR_TAVILY_API_KEY_HERE":
        print("⚠️  WARNING: Please replace the placeholder API keys with your actual keys!")
        print("Update the GEMINI_API_KEY and TAVILY_API_KEY variables in the configuration section.")
        return

    try:
        # Initialize the agent
        agent = ReActWebResearchAgent(GEMINI_API_KEY, TAVILY_API_KEY)

        # Get research topic
        topic = get_user_input()
        print(f"\n🎯 Selected topic: {topic}")

        # Ask for number of questions
        try:
            num_questions = int(input("\n❓ How many research questions to generate? (recommended 5-10): ").strip() or "5")
            num_questions = max(1, min(num_questions, 20))  # Limit between 1-20
            print(f"📝 Will generate {num_questions} questions")
        except ValueError:
            num_questions = 5
            print("Invalid input. Using default: 5 questions")

        # Start research process
        print(f"\n🚀 Starting research with {num_questions} questions...")
        report = agent.research_topic(topic, num_questions)

        # Display report
        print("\n" + "="*80)
        print("📊 RESEARCH REPORT COMPLETED")
        print("="*80)
        print(report)

        # Save to file
        save_choice = input("\n💾 Save report to file? (y/n): ").strip().lower()
        if save_choice in ['y', 'yes']:
            save_report_to_file(report, topic)

        print("\n✅ Research process completed successfully!")

    except Exception as e:
        print(f"❌ An error occurred: {str(e)}")
        print("Please check your API keys and internet connection.")


In [42]:
if __name__ == "__main__":
    main()

# ============================================================================
# ADDITIONAL HELPER FUNCTIONS FOR TESTING
# ============================================================================

def test_with_multiple_questions():
    """Test function with multiple questions to showcase flexibility"""

    if GEMINI_API_KEY == "YOUR_GEMINI_API_KEY_HERE" or TAVILY_API_KEY == "YOUR_TAVILY_API_KEY_HERE":
        print("⚠️  Please set your API keys first!")
        return

    try:
        agent = ReActWebResearchAgent(GEMINI_API_KEY, TAVILY_API_KEY)

        # Test with multiple question counts
        test_cases = [
            ("Artificial Intelligence", 20),
            ("Climate Change", 30),
            ("Blockchain Technology", 10)
        ]

        for topic, num_questions in test_cases:
            print(f"\n{'='*60}")
            print(f"🧪 Testing: {topic} with {num_questions} questions")
            print(f"{'='*60}")

            # Just test question generation for demonstration
            questions = agent.generate_research_questions(topic, num_questions)

            print(f"\n📋 Generated {len(questions)} questions for '{topic}':")
            for i, q in enumerate(questions, 1):
                print(f"   {i}. {q}")

            print(f"\n✅ Successfully generated {len(questions)} questions")

    except Exception as e:
        print(f"❌ Test failed: {str(e)}")

def research_with_custom_questions():
    """Research function with custom question count"""

    if GEMINI_API_KEY == "YOUR_GEMINI_API_KEY_HERE" or TAVILY_API_KEY == "YOUR_TAVILY_API_KEY_HERE":
        print("⚠️  Please set your API keys first!")
        return

    try:
        agent = ReActWebResearchAgent(GEMINI_API_KEY, TAVILY_API_KEY)

        # Get custom inputs
        topic = input("📝 Enter your research topic: ").strip()
        if not topic:
            topic = "Artificial Intelligence"
            print(f"Using default topic: {topic}")

        try:
            num_questions = int(input(f"❓ How many questions to generate? (1-20): ").strip() or "8")
            num_questions = max(1, min(num_questions, 20))
        except ValueError:
            num_questions = 8
            print("Using default: 8 questions")

        print(f"\n🚀 Starting research on '{topic}' with {num_questions} questions...")

        # Generate full report
        report = agent.research_topic(topic, num_questions)

        print("\n" + "="*80)
        print("📊 RESEARCH REPORT COMPLETED")
        print("="*80)
        print(report)

        # Save option
        save_choice = input("\n💾 Save report to file? (y/n): ").strip().lower()
        if save_choice in ['y', 'yes']:
            save_report_to_file(report, topic)

        return report

    except Exception as e:
        print(f"❌ Research failed: {str(e)}")

🤖 ReAct Web Research Agent
Building AI Agents from Scratch - Assignment Implementation

🔑 API Configuration:
Please make sure you have set your API keys in the configuration section above.
✅ ReAct Agent initialized successfully!
🎯 Topic Input Options:
1. Enter a custom topic
2. Choose from example topics

Enter your choice (1 or 2): 1

📝 Enter your research topic: Food Technology in India

🎯 Selected topic: Food Technology in India

❓ How many research questions to generate? (recommended 5-10): 5
📝 Will generate 5 questions

🚀 Starting research with 5 questions...
🚀 Starting ReAct research process for topic: 'Food Technology in India'
🧠 PLANNING PHASE: Generating 5 research questions for 'Food Technology in India'...




❌ Error generating questions: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.

📋 Research Questions Generated:
   1. What is Food Technology in India and why is it important?
   2. What are the main aspects or components of Food Technology in India?
   3. How has Food Technology in India evolved over time?
   4. What are the current challenges related to Food Technology in India?
   5. What solutions or approaches exist for Food Technology in India?


--- Processing Question 1/5 ---
🔍 Searching web for: What is Food Technology in India and why is it important?
✅ Found 3 results

--- Processing Question 2/5 ---
🔍 Searching web for: What are the main aspects or components of Food Technology in India?
✅ Found 3 results

--- Proces

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

📥 Download started!

✅ Research process completed successfully!
