# AI Research Assistant Agent: LARA


In [None]:
# Install Dependencies
!pip install langchain langchain-community langchain-core openai langchain-openai tavily-python google-search-results
# Mounts google drive for memory storage
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Import libraries
import os
import json
import re
import requests
# Langchain Tools/Models
from langchain_openai import ChatOpenAI
from langchain.tools import Tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain.prompts import PromptTemplate
# Web/Academic Search
from tavily import TavilyClient
from serpapi import GoogleSearch
from google.colab import userdata

In [None]:
# API Keys
os.environ['TavilyAPIKey'] = userdata.get('TavilyAPIKey')
os.environ['OpenAIAPIKey'] = userdata.get('OpenAIAPIKey')
os.environ['SerpAPIKey'] = userdata.get('SerpAPIKey')
tavilykey = os.environ.get('TavilyAPIKey')
serpkey = os.environ.get('SerpAPIKey')
openaikey = os.environ.get('OpenAIAPIKey')

In [None]:
# Keyword Definition
def generate_keywords(topic):
    prompt = f"Extract key research keywords from this topic: {topic}" # Prompts the model to find keywords from the input
    response = ChatOpenAI(model="gpt-4", api_key=openaikey).invoke(prompt) # Initializes GPT-4 to complete the prompt
    return response.content.split(", ")

In [None]:
# Security Check for Inappropriate/Harmful Content
def check_inappropriate_content(topic):
    sexual_keywords = [
        'sex', 'porn', 'erotic', 'adult', 'nsfw',
        'nudity', 'explicit', 'prostitution', 'fetish'
    ]
    dangerous_keywords = [
        'violence', 'murder', 'assault', 'terrorism', 'bomb', 'weapon',
        'kill', 'explosive', 'firearm', 'harm', 'self-harm'
    ]
    harmful_keywords = [
        'hate speech', 'racism', 'discrimination', 'xenophobia', 'slur',
        'abuse', 'harassment', 'bullying', 'defamation'
    ]
    all_keywords = sexual_keywords + dangerous_keywords + harmful_keywords
    topic_lower = topic.lower() # Corrects format for keywords
    for keyword in all_keywords:
        if re.search(rf'\b{keyword}\b', topic_lower):
            return True, f"Inapropriate Request'{keyword}'. Provide a safer topic to research." # Cancels Input Request
# Identifies innapropriate patterns through words, making exceptions if they don't match.
    inappropriate_patterns = [
        r'sex.*(guide|tutorial|instruction)',
        r'(make|build|create).*(bomb|weapon|explosive)',
        r'(promot|encourag|incit).*hatred',
        r'(how to|guide to).*(kill|harm|murder)'
    ]
    for pattern in inappropriate_patterns:
        if re.search(pattern, topic_lower):
            return True, "The topic has been flagged as inappropriate, dangerous, or harmful." # Cancels Input Request
    return False, ""

In [None]:
# Memory System
# Saves input, report, and feedback to a google drive.
def save_interaction(topic, report, feedback=None):
    data = {"topic": topic, "report": report, "feedback": feedback}
    filename = f"/content/drive/MyDrive/research_{topic.replace(' ', '_')}.json"
    try:
        with open(filename, "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f"Error Saving Memory: {e}")
# Checks and loads previously saved feedback from google drive.
def load_previous_feedback():
    feedback_list = []
    try:
        for filename in os.listdir("/content/drive/MyDrive/"):
            if filename.startswith("research_") and filename.endswith(".json"):
                with open(f"/content/drive/MyDrive/{filename}", "r") as f:
                    data = json.load(f)
                    if data.get("feedback"):
                        feedback_list.append(data["feedback"])
        return feedback_list
    except Exception as e: # Prints Error Message if Memory System doesn't work.
        print(f"Error Loading Memory: {e}")
        return []

In [None]:
# WebSearch and AcademicSearch Functions
tavily = TavilyClient(api_key=tavilykey)
def websearch(query):
    try:
        search_response = tavily.search(query=query)
        return search_response['results'][:5]  # Limit to top 5 results
    except Exception as e: # Allows model to continue if an error occurs.
        print(f"Web search error: {e}")
        return []

# SerpAPI for Academic Sources
def scholarsearch(query):
    try:
        params = {
            "engine": "google_scholar",
            "q": query,
            "api_key": userdata.get('SerpAPIKey'),
            "num": 2  # Limit to 2 results for simplicity
        }
        search = GoogleSearch(params)
        results = search.get_dict().get('organic_results', [])[:2]
        return results
    except Exception as e:
        print(f"Academic search error: {e}")
        return []

In [None]:
# Citation Format/Generation
def generate_citation(source):
    try:
        if isinstance(source, dict):
            if 'title' in source and 'url' in source:
                title = source['title']
                url = source['url']
                date = source.get('published_date', 'n.d.')
                return f"{title}. ({date}). Retrieved from {url}"
            else:
                return "Citation unavailable: missing title or url" # Has multiple checks for different types of data (list,dict,str) and errors for missing data.
        elif hasattr(source, 'title') and hasattr(source, 'url'):
            title = source.title
            url = source.url
            year = getattr(source, 'year', 'n.d.')
            if hasattr(source, 'authors'):
                authors = source.authors
                if isinstance(authors, list):
                    author_names = []
                    for author in authors:
                        if isinstance(author, dict):
                            author_names.append(author.get('name', 'Unknown'))
                        elif hasattr(author, 'name'):
                            author_names.append(author.name)
                        else:
                            author_names.append(str(author))
                    authors_str = ', '.join(author_names)
                elif isinstance(authors, str):
                    authors_str = authors
                else:
                    authors_str = 'Unknown Author'
            else:
                authors_str = 'Unknown Author'
            return f"{authors_str}. ({year}). {title}. Retrieved from {url}"
        else:
            return "Citation unavailable: missing title or url"
    except Exception as e:
        print(f"Citation generation error: {e}, source type: {type(source)}")
        return "Citation unavailable"

In [None]:
# Agent Tools
# Web Search using TavilyAPI
web_search_tool = Tool(
    name="WebSearch",
    func=lambda query: websearch(query),
    description="Search the web for sources. Input: search query. Output: list of sources."
)
# Google Scholar Search using SerpAPI
academic_search_tool = Tool(
    name="AcademicSearch",
    func=lambda query: scholarsearch(query),
    description="Search academic papers. Input: search query. Output: list of papers."
)

citation_tool = Tool(
    name="CitationGenerator",
    func=generate_citation,
    description="Generate a citation from source metadata. Input: source dictionary or object. Output: citation string."
)

tools = [web_search_tool, academic_search_tool, citation_tool]

In [None]:
# Prompt Template for Reasoning/Summarization.
prompt_template = """
You are a research assistant AI. Your task is to research the topic "{topic}" and provide a detailed report. You have access to the following tools:

{tools}

Use the following format:

Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Follow these steps carefully, in order, and explain your reasoning at each stage:

1. Understand the Topic
   - Think about what the user is asking for. Break down the topic into key concepts.

2. Search for Sources
   - Use 'WebSearch' to find ONLY 5 varied web sources. Input: a search query based on the topic.
   - Use 'AcademicSearch' to find ONLY 2 academic papers. Input: a search query based on the topic.
   - Aim for a variety of sources.
   - No Duplicates

3. Evaluate Sources
   - For each source, assess relevance (keywords in title/snippet/abstract).
   - Assess credibility (domain, date, author).
   - Try to keep only relevant and credible sources. Explain decisions.
   - Aim for 3 Web sources, and 2 Academic sources.
   - Rank sources based on relevance and credibility.

4. Extract Information
   - Extract key points from snippets/abstracts.

5. Organize Information
   - Group into sub-topics based on themes.
   - Rank sources within each sub-topic, with reasoning.

6. Summarize
   - Generate summaries for each sub-topic.
   - Identify relations/trends across sources.

7. Generate Citations
   - Use 'CitationGenerator' for each source.

8. Compile the Report
   - Rank sources from most relevant/credible to least.
   - Structure with sub-topics, summaries, and citations for each source.
   - Add overall summary and references.

Previous feedback: {feedback}
- Avoid results that are not unreliable.
- Prioritize preferred sources if mentioned.
- Update results based on feedback.

When you have compiled the report, present it as:
Final Answer:
- 5 Sources, Ranked based on relevancy/credbility.
- A short summary for each source.
- Citations for each source.
- Overall summary and references including key connections between the sources.

{agent_scratchpad}
"""

In [None]:
# Compile Agent
llm = ChatOpenAI(model="gpt-4", api_key=openaikey) # Large Language Model powered by GPT-4
prompt = PromptTemplate.from_template(prompt_template) # Defines the template prompt above
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt) # Creates the agent
executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True) # Executes the Agents code.

In [None]:
# Run the Agent and Request Feedback
topic = input("Enter a research topic: ")
is_inappropriate, rejection_message = check_inappropriate_content(topic) # Prevents code from running if inappropriate
if is_inappropriate:
    print(rejection_message)
else:
    feedback = " ".join(load_previous_feedback()) if load_previous_feedback() else "No previous feedback available." # Loads from memory if available
    result = executor.invoke({
        "topic": topic,
        "feedback": feedback,
        "tools": "\n".join([f"{tool.name}: {tool.description}" for tool in tools]),
        "tool_names": ", ".join([tool.name for tool in tools]),
        "agent_scratchpad": ""
    })
    print(result['output'])

    # Update Memory/Request Feedback
    user_feedback = input("Enter Feedback Here:")
    save_interaction(topic, result['output'], user_feedback)