In [None]:
#%pip install arxiv
#%pip install openai

import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv

PAPER_DIR = "arv_papers"

def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    arXiv is a open source repository of published papers in various fields.

    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."


In [None]:
search_papers("machine learning", max_results=5)
extract_info("2301.09753")

In [None]:
# Tool schema to integrate with LLMs
tools = [
    {
        "type": "function",
        "function": {
            "name": "search_papers",
            "description": "Search for papers on arXiv based on a topic and store their information.",
            "parameters": {
                "type": "object",
                "properties": {
                    "topic": {
                        "type": "string",
                        "description": "The topic to search for"
                    },
                    "max_results": {
                        "type": "integer",
                        "description": "Maximum number of results to retrieve",
                        "default": 5
                    }
                },
                "required": ["topic"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "extract_info",
            "description": "Search for information about a specific paper across all topic directories.",
            "parameters": {
                "type": "object",
                "properties": {
                    "paper_id": {
                        "type": "string",
                        "description": "The ID of the paper to look for"
                    }
                },
                "required": ["paper_id"]
            }
        }
    }
]

In [None]:
# Example of how to call the tools programmatically and map
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    
    result = mapping_tool_function[tool_name](**tool_args)

    if result is None:
        result = "The operation completed but didn't return any results."
        
    elif isinstance(result, list):
        result = ', '.join(result)
        
    elif isinstance(result, dict):
        # Convert dictionaries to formatted JSON strings
        result = json.dumps(result, indent=2)
    
    else:
        # For any other type, convert using str()
        result = str(result)
    return result

In [None]:
execute_tool("search_papers", {"topic": "gardening", "max_results": 3})
execute_tool("extract_info", {"paper_id": "1106.2104v1"})

In [1]:
#ChatBot Code
from dotenv import load_dotenv
import os
import openai

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

client = openai.OpenAI()

def openai_response(prompt: str):
    '''Get response from OpenAI API
    Args:
        prompt: The prompt to send to the LLM
    '''

    # Initialize messages with the user prompt
    messages = [{'role': 'user', 'content': prompt}]
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",  # Updated model name
        messages=messages,
        tools=tools,
        tool_choice="auto",  # Let the model decide if it needs to use a tool
        max_tokens=1024,
        temperature=0.0
    )

    msg = response.choices[0].message

    # Check for tool calls
    if msg.tool_calls:
        # Tool was called, then load the tool and execute it
        for tool_call in msg.tool_calls:
            tool_name = tool_call.function.name
            tool_args = json.loads(tool_call.function.arguments)
            tool_call_id = tool_call.id

            print(f"Calling tool {tool_name} with args {tool_args}")
            result = execute_tool(tool_name, tool_args)

            # Append tool result
            messages.append(msg)  # assistant message with tool call
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call_id,
                "content": result
            })

            # Get final response after tool execution
            followup = client.chat.completions.create(
                model="gpt-4-turbo-preview",
                messages=messages,
                max_tokens=1024,
                temperature=0.0
            )
            print(followup.choices[0].message.content)
    else:
        # No tool used, just print content
        print(msg.content)



In [None]:
  
# Actial Chat prompt processing
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            print("\nQuery: ", end='', flush=True)
            query = input().strip()
            if query.lower() == 'quit':
                break

            openai_response(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

# Run the caht loop and typr 'quit' to exit
chat_loop()