# Library

In [33]:
import arxiv, os, requests

### arxiv q-fin finance topic
- `q-fin.RM`    Risk Management
- `q-fin.CP`    Computational Finance
- `q-fin.ST`    Statistical Finance
- `q-fin.TR`    Trading and Market Microstructure
- `q-fin.EC`    Economics
- `q-fin.GN`    General Finance
- `q-fin.MF`	Mathematical Finance
- `q-fin.PM`	Portfolio Management
- `q-fin.PR`	Pricing of Securities


In [34]:
def download_pdfs(pdf_links, download_folder="documents", filename = ""):
    # Create the download folder if it doesn't already exist.
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)
        print(f"Created directory: {download_folder}")

    url = pdf_links
    try:
        response = requests.get(url, stream=True)
        # Raise an exception for bad status codes (4xx or 5xx)
        response.raise_for_status()

        # Extract the filename from the URL
        # filename = os.path.join(download_folder, url.split("/")[-1])
        filename = os.path.join(download_folder, filename+'.pdf')

        # Save the content to a local file
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"Successfully downloaded: {filename}")

    except requests.exceptions.RequestException as e:
        print(f"Error downloading {url}: {e}")
    except Exception as e:
        print(f"An unexpected error occurred for {url}: {e}")

# Arxiv finance retriver

In [35]:
#API connect
client = arxiv.Client()
#search
search = arxiv.Search(
    query=f"cat:q-fin*", #q-fin is finance topic
    max_results=20,
    sort_by=arxiv.SortCriterion.SubmittedDate #Date sort
)

In [36]:
results = client.results(search)
for result in client.results(search):
    print(f"📄 Title: {result.title}")
    # print(f"   ➡️ Primary Category: {result.primary_category}") 
    # This is the full list of all categories ("subqueries")
    print(f"   🏷️ All Categories (Subqueries): {result.categories}")
    print(f"   🔗 PDF Link: {result.pdf_url}")
    for categorie in result.categories:
        if('q-fin' not in categorie): continue
        download_folder = f"../data/finpaper/{categorie}/"
        download_pdfs(pdf_links=result.pdf_url, download_folder=download_folder, filename=result.title)
    print("-" * 30)

📄 Title: Disaster Risk Financing through Taxation: A Framework for Regional Participation in Collective Risk-Sharing
   🏷️ All Categories (Subqueries): ['econ.TH', 'q-fin.RM']
   🔗 PDF Link: http://arxiv.org/pdf/2506.18895v1
Created directory: ../data/finpaper/q-fin.RM/
Successfully downloaded: ../data/finpaper/q-fin.RM/Disaster Risk Financing through Taxation: A Framework for Regional Participation in Collective Risk-Sharing.pdf
------------------------------
📄 Title: The Theory of Economic Complexity
   🏷️ All Categories (Subqueries): ['econ.GN', 'q-fin.EC']
   🔗 PDF Link: http://arxiv.org/pdf/2506.18829v1
Created directory: ../data/finpaper/q-fin.EC/
Successfully downloaded: ../data/finpaper/q-fin.EC/The Theory of Economic Complexity.pdf
------------------------------
📄 Title: An AI-powered Tool for Central Bank Business Liaisons: Quantitative Indicators and On-demand Insights from Firms
   🏷️ All Categories (Subqueries): ['econ.GN', 'q-fin.EC']
   🔗 PDF Link: http://arxiv.org/pdf/2