In [None]:
import arxiv
import os

title_query = (
    '"retrieval-augmented generation" OR '
    '"RAG system" OR '
    '"retrieval-augmented language model" OR '
    '"RAG-based generation"'
)
all_fields_query = (
    'privacy OR "personal data" OR PII OR fairness OR '
    '"bias mitigation" OR debiasing OR transparency OR '
    '"source attribution" OR "citation clarity" OR "ethical AI" OR "responsible AI"'
)
cat_query = 'cat:cs.*'
final_query = f'(ti:{title_query}) AND ({all_fields_query}) AND ({cat_query})'

search = arxiv.Search(
    query=final_query,
    max_results=200,
    sort_by=arxiv.SortCriterion.SubmittedDate
)

os.makedirs('arxiv_bibtex', exist_ok=True)
count = 0

def to_bibtex(result):
    authors = " and ".join([a.name for a in result.authors])
    year = str(result.published.year)
    title = result.title.replace('\n', ' ').replace('{', '').replace('}', '')
    arxiv_id = result.entry_id.split('/')[-1]
    url = result.entry_id
    category = result.primary_category if hasattr(result, "primary_category") else ""
    abstract = result.summary.replace('\n', ' ').replace('{', '').replace('}', '')
    pdf_url = result.pdf_url if hasattr(result, "pdf_url") else ""
    # DOI 있을 경우 추가
    doi = ""
    if hasattr(result, "doi") and result.doi:
        doi = f"  doi={{ {result.doi} }},\n"
    bib = (
        f"@article{{{arxiv_id},\n"
        f"  title={{ {title} }},\n"
        f"  author={{ {authors} }},\n"
        f"  year={{ {year} }},\n"
        f"  journal={{ arXiv preprint arXiv:{arxiv_id} }},\n"
        f"  url={{ {url} }},\n"
        f"  eprint={{ {arxiv_id} }},\n"
        f"  archivePrefix={{arXiv}},\n"
        f"  primaryClass={{ {category} }},\n"
        f"  pdf={{ {pdf_url} }},\n"
        f"{doi}"
        f"  abstract={{ {abstract} }}\n"
        f"}}\n"
    )
    return bib

for result in search.results():
    year = int(str(result.published)[:4])
    if year >= 2018:
        bibtex_str = to_bibtex(result)
        arxiv_id = result.entry_id.split('/')[-1]
        bib_path = os.path.join('arxiv_bibtex', f"{arxiv_id}.bib")
        with open(bib_path, "w", encoding="utf-8") as f:
            f.write(bibtex_str)
        count += 1

print(f"\n✅ BibTeX export complete! {count} records saved in the arxiv_bibtex folder.")


  for result in search.results():



✅ BibTeX export complete! 40 records saved in the arxiv_bibtex folder.
