## Chatbot Example
This example demonstrates how to create a simple chatbot using the OpenAI API. The chatbot can search for academic papers and extract information from them. And also covered the function tool calling feature of the OpenAI API.

In [None]:
%pip install openai arxiv python-dotenv --quiet

In [None]:
## Import Libraries
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import openai

In [None]:
# Directory to Store
PAPER_DIR = "papers"

## Tool Function

In [None]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.

    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)

    Returns:
        List of paper IDs found in the search
    """

    # Use arxiv to find the papers
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query=topic, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)

    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)

    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            "title": paper.title,
            "authors": [author.name for author in paper.authors],
            "summary": paper.summary,
            "pdf_url": paper.pdf_url,
            "published": str(paper.published.date()),
        }
        papers_info[paper.get_short_id()] = paper_info

    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)

    print(f"Results are saved in: {file_path}")

    return paper_ids

In [None]:
search_papers("AI in Healthcare", max_results=5)

In [None]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.

    Args:
        paper_id: The ID of the paper to look for

    Returns:
        JSON string with paper information if found, error message if not found
    """

    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue

    return f"There's no saved information related to paper {paper_id}."

In [None]:
extract_info("2409.07415v2")

## Tool Schema

In [None]:
tools = [
    {
        "type": "function",
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "parameters": {
            "type": "object",
            "properties": {
                "topic": {"type": "string", "description": "The topic to search for"},
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve",
                    "default": 5,
                },
            },
            "required": ["topic"],
            "additionalProperties": False,
        },
    },
    {
        "type": "function",
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "parameters": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for",
                }
            },
            "required": ["paper_id"],
            "additionalProperties": False,
        },
    },
]

## Tool Mapping

In [None]:
mapping_tool_function = {"search_papers": search_papers, "extract_info": extract_info}


def execute_tool(tool_name, tool_args):
    result = mapping_tool_function[tool_name](**tool_args)

    if result is None:
        result = "The operation completed but didn't return any results."

    elif isinstance(result, list):
        result = ", ".join(result)

    elif isinstance(result, dict):
        result = json.dumps(result, indent=2)

    else:
        result = str(result)
    print(f"Tool {tool_name} executed with result: {result}")
    return result

## Chatbot Code

In [None]:
load_dotenv()
client = openai.Client()

### Query Processing

In [None]:
def process_query(query: str):
    messages = [{"role": "user", "content": query}]

    is_loop_nedded = True
    while is_loop_nedded:
        response = client.responses.create(
            model="gpt-4o",
            input=messages,
            tools=tools,
        )
        for block in response.output:

            if block.type == "message":
                return block.content[0].text
                is_loop_nedded = False

            elif block.type == "function_call":
                tool_call_id = block.call_id
                tool_name = block.name
                tool_args = json.loads(block.arguments)

                print(
                    f"Processing tool call: {tool_name} with args: {tool_args} with call_id: {tool_call_id}"
                )
                result = execute_tool(tool_name, tool_args)
                messages.append(block.model_dump())
                messages.append(
                    {
                        "type": "function_call_output",
                        "call_id": tool_call_id,
                        "output": str(result),
                    }
                )

                is_loop_nedded = True

In [None]:
process_query("Hi, can you search for papers on AI in Healthcare and Check the 2409.07415v2?")

In [None]:
print(process_query("Hi."))

### Chatbot Function

In [None]:
from IPython.display import display, Markdown

def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == "quit":
                break

            response = process_query(query)
            display(Markdown(response))
            print()
        except Exception as e:
            print(f"\nError: {e}")

In [None]:
chat_loop()