# Import libraries

In [None]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
from google.generativeai import GenerativeModel

# Constant

In [5]:
PAPER_DIR = "papers"

In [19]:
def search_papers(topic:str, max_results: int=5)->List[str]:
    client = arxiv.Client()
    search = arxiv.Search(
        query = topic,
        max_results= max_results,
        sort_by=arxiv.SortCriterion.Relevance
    )
    papers = client.results(search)
    path = os.path.join(PAPER_DIR,topic.lower().replace(" ","_"))
    os.makedirs(path)

    file_path = os.path.join(path,"papers_info.json")

    try:
        with open(file_path,"r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError,json.JSONDecodeError):
        papers_info = {}
    
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            "title":paper.title,
            "authors":[author.name for author in paper.authors],
            "summary":paper.summary,
            "pdf_url":paper.pdf_url,
            "published":str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    with open(file_path,"w") as json_file:
        json.dump(papers_info,json_file,indent=2)
    print(f"Results are saved in :{file_path}")
    return paper_ids




In [20]:
search_papers("computers")

ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

In [15]:
def extract_info(paper_id:str)->str:
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR,item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path,"papers_info.json")
            
            if os.path.isfile(file_path):
                
                try:
                    with open(file_path,"r") as json_file:
                        papers_info = json.load(json_file)
                        print(papers_info.keys())
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id],indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    return f"There is no saved information related to paper {paper_id}"


In [16]:
extract_info("1310.7911v2")

dict_keys(['title', 'authors', 'summary', 'pdf_url', 'published'])


'There is no saved information related to paper 1310.7911v2'