In [1]:
import arxiv
import json
import os
from dotenv import load_dotenv
from typing import List
from openai import OpenAI

In [2]:
PAPER_DIR = "papers"
load_dotenv()

True

## Tool Functions

In [63]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

In [64]:
search_papers("computers")

Results are saved in: papers/computers/papers_info.json


['1310.7911v2',
 'math/9711204v1',
 '2208.00733v1',
 '2504.07020v1',
 '2403.03925v1']

In [65]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    Args:
        paper_id: The ID of paper to look for
    Returns:
        JSON string with paper informationif found, error message if not found
    """

    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent = 2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue

    return f"There's no saved information related to paper {paper_id}."
                

In [66]:
extract_info('1310.7911v2')

'{\n  "title": "Compact manifolds with computable boundaries",\n  "authors": [\n    "Zvonko Iljazovic"\n  ],\n  "summary": "We investigate conditions under which a co-computably enumerable closed set\\nin a computable metric space is computable and prove that in each locally\\ncomputable computable metric space each co-computably enumerable compact\\nmanifold with computable boundary is computable. In fact, we examine the notion\\nof a semi-computable compact set and we prove a more general result: in any\\ncomputable metric space each semi-computable compact manifold with computable\\nboundary is computable. In particular, each semi-computable compact\\n(boundaryless) manifold is computable.",\n  "pdf_url": "http://arxiv.org/pdf/1310.7911v2",\n  "published": "2013-10-29"\n}'

## Tool Functions

In [67]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "search_papers",
            "description": "Search for papers on arXiv based on a topic and store their information.",
            "parameters": {
                "type": "object",
                "properties": {
                    "topic": {
                        "type": "string",
                        "description": "The topic to search for"
                    }
                },
                "required": ["topic"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": "Search for information about a specific paper across all topic directories.",
            "parameters": {
                "type": "object",
                "properties": {
                    "paper_id": {
                        "type": "string",
                        "description": "The ID of the paper to look for"
                    }
                },
                "required": ["paper_id"]
            }
        }
    }
]


## Tool Mapping

In [68]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    result = mapping_tool_function[tool_name](**tool_args)

    if result is None:
        result = "The operation completed but did not return any results."
    
    elif isinstance(result, list):
        result = ', '.join(result)

    elif isinstance(result, dict):
        # Convert dictionaries to formatted JSON strings
        result = json.dumps(result, indent = 2)
    else:
        # For any other type convert using string
        result = str(result)
    return result

In [69]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## Query Processing

In [70]:
def process_query(query):
    messages = [{'role': 'user', 'content': query}]
    process_loop = True

    while process_loop:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            tools=tools,
            tool_choice="auto",
            max_tokens=2024
        )

        reply = response.choices[0].message
        tool_calls = reply.tool_calls

        if tool_calls:
            messages.append(reply)  # assistant tool call message

            for tool_call in tool_calls:
                tool_name = tool_call.function.name
                tool_args = json.loads(tool_call.function.arguments)
                tool_id = tool_call.id

                print(f"\n🛠 Calling tool {tool_name} with args {tool_args}")

                result = execute_tool(tool_name, tool_args)

                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_id,
                    "content": result
                })
        else:
            print(f"\n🤖 Assistant: {reply.content}")
            messages.append(reply)
            process_loop = False


In [71]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [72]:
chat_loop()

Type your queries or 'quit' to exit.



Query:  hi



🤖 Assistant: Hello! How can I assist you today?





Query:  search for paper on algebra



🛠 Calling tool search_papers with args {'topic': 'algebra'}
Results are saved in: papers/algebra/papers_info.json

🛠 Calling tool extract_info with args {'paper_id': '1104.3954v1'}

🛠 Calling tool extract_info with args {'paper_id': 'math/0501518v2'}

🛠 Calling tool extract_info with args {'paper_id': '1012.2844v1'}

🛠 Calling tool extract_info with args {'paper_id': '1607.02068v1'}

🛠 Calling tool extract_info with args {'paper_id': 'math/0506093v1'}

🤖 Assistant: Here are some recent papers on the topic of algebra:

1. **Invariant Algebras**
   - **Authors**: Keqin Liu
   - **Published**: 2011-04-20
   - **Summary**: The paper introduces invariant algebras and representations of algebras, providing methods for constructing various types of algebras such as Lie algebras, Jordan algebras, and more.
   - **PDF**: [Download Here](http://arxiv.org/pdf/1104.3954v1)

2. **Deformation of algebras over the Landweber-Novikov algebra**
   - **Authors**: Donald Yau
   - **Published**: 2005-01-2


Query:  quit
