In [6]:
import arxiv
from datetime import datetime

# 1. 쿼리 구성
query = '''
(
  (ti:"retrieval-augmented generation" OR ti:"RAG system" OR ti:"retrieval-augmented language model" OR ti:"RAG-based generation")
  AND
  (privacy OR "personal data" OR PII OR fairness OR "bias mitigation" OR debiasing OR transparency OR "source attribution" OR "citation clarity" OR "ethical AI" OR "responsible AI")
  AND
  (cat:cs.*)
)
'''

# 2. 검색 및 출력
search = arxiv.Search(
    query=query,
    max_results=100,  # 필요시 더 늘릴 수 있음
    sort_by=arxiv.SortCriterion.SubmittedDate
)

result_list = []
for result in search.results():
    year = int(str(result.published)[:4])
    if year >= 2018:
        result_list.append((result.title, result.published.date(), result.categories, result.entry_id))

print("검색된 논문 목록 (2018년 이후, Computer Science):")
for t, date, cats, link in result_list:
    print(f"- {date}: {t}\n  {cats} | {link}\n")


  for result in search.results():


검색된 논문 목록 (2018년 이후, Computer Science):


In [8]:
import arxiv

# 1. 제목에서 기술 키워드 OR로
tech = '"retrieval-augmented generation" OR "RAG system" OR "retrieval-augmented language model" OR "RAG-based generation"'
# 2. 전체 필드에서 윤리 키워드 OR로
ethics = 'privacy OR "personal data" OR PII OR fairness OR "bias mitigation" OR debiasing OR transparency OR "source attribution" OR "citation clarity" OR "ethical AI" OR "responsible AI"'

# 쿼리 조합
query = f'(ti:{tech}) AND ({ethics}) AND (cat:cs.*)'

search = arxiv.Search(
    query=query,
    max_results=100,
    sort_by=arxiv.SortCriterion.SubmittedDate
)

found = False
for result in search.results():
    print(result.title, result.published, result.categories)
    found = True
if not found:
    print("❌ 결과가 안 나옴. 쿼리에서 일부 키워드/필드명 빼고 pool을 넓혀야 함.")


  for result in search.results():


Securing RAG: A Risk Assessment and Mitigation Framework 2025-05-13 16:39:00+00:00 ['cs.CR', 'cs.AI', 'cs.IR']
Optimizing Retrieval-Augmented Generation: Analysis of Hyperparameter Impact on Performance and Efficiency 2025-05-13 11:13:27+00:00 ['cs.LG', 'cs.AI', 'cs.CL']
CBM-RAG: Demonstrating Enhanced Interpretability in Radiology Report Generation with Multi-Agent RAG and Concept Bottleneck Models 2025-04-29 16:14:55+00:00 ['cs.AI', 'cs.CV', 'cs.IR']
Privacy-Preserving Federated Embedding Learning for Localized Retrieval-Augmented Generation 2025-04-27 04:26:02+00:00 ['cs.CL']
SMARTFinRAG: Interactive Modularized Financial RAG Benchmark 2025-04-25 02:29:56+00:00 ['cs.CE', 'cs.CL', 'cs.IR']
CiteFix: Enhancing RAG Accuracy Through Post-Processing Citation Correction 2025-04-22 06:41:25+00:00 ['cs.IR', 'cs.CL']
The Other Side of the Coin: Exploring Fairness in Retrieval-Augmented Generation 2025-04-11 10:17:10+00:00 ['cs.CL', 'cs.AI']
Privacy-Aware RAG: Secure and Isolated Knowledge Ret

In [9]:
import arxiv
from datetime import datetime

# Title(기술 키워드) OR 묶기
title_query = (
    '"retrieval-augmented generation" OR '
    '"RAG system" OR '
    '"retrieval-augmented language model" OR '
    '"RAG-based generation"'
)

# All fields(윤리 키워드) OR 묶기
all_fields_query = (
    'privacy OR "personal data" OR PII OR fairness OR '
    '"bias mitigation" OR debiasing OR transparency OR '
    '"source attribution" OR "citation clarity" OR "ethical AI" OR "responsible AI"'
)

# Computer Science subject area (cs.*)
cat_query = 'cat:cs.*'

# 최종 쿼리 (칸별 AND는 arXiv 웹과 동일하게 작동)
final_query = f'(ti:{title_query}) AND ({all_fields_query}) AND ({cat_query})'

# arxiv 라이브러리 검색 실행
search = arxiv.Search(
    query=final_query,
    max_results=200,  # 필요시 조절
    sort_by=arxiv.SortCriterion.SubmittedDate
)

# 결과 정리: 2018년 이후 논문만
results = []
for result in search.results():
    year = int(str(result.published)[:4])
    if year >= 2018:
        results.append({
            "title": result.title,
            "published": result.published.date(),
            "categories": result.categories,
            "entry_id": result.entry_id
        })

# 결과 출력
print(f"2018년 이후 Computer Science 카테고리 검색 결과 {len(results)}건:")
for i, r in enumerate(results, 1):
    print(f"{i}. {r['published']} | {r['title']} | {r['categories']} | {r['entry_id']}")


  for result in search.results():


2018년 이후 Computer Science 카테고리 검색 결과 40건:
1. 2025-05-13 | Securing RAG: A Risk Assessment and Mitigation Framework | ['cs.CR', 'cs.AI', 'cs.IR'] | http://arxiv.org/abs/2505.08728v1
2. 2025-05-13 | Optimizing Retrieval-Augmented Generation: Analysis of Hyperparameter Impact on Performance and Efficiency | ['cs.LG', 'cs.AI', 'cs.CL'] | http://arxiv.org/abs/2505.08445v1
3. 2025-04-29 | CBM-RAG: Demonstrating Enhanced Interpretability in Radiology Report Generation with Multi-Agent RAG and Concept Bottleneck Models | ['cs.AI', 'cs.CV', 'cs.IR'] | http://arxiv.org/abs/2504.20898v2
4. 2025-04-27 | Privacy-Preserving Federated Embedding Learning for Localized Retrieval-Augmented Generation | ['cs.CL'] | http://arxiv.org/abs/2504.19101v1
5. 2025-04-25 | SMARTFinRAG: Interactive Modularized Financial RAG Benchmark | ['cs.CE', 'cs.CL', 'cs.IR'] | http://arxiv.org/abs/2504.18024v1
7. 2025-04-22 | CiteFix: Enhancing RAG Accuracy Through Post-Processing Citation Correction | ['cs.IR', 'cs.CL'] | ht

In [None]:
import arxiv
import os

title_query = (
    '"retrieval-augmented generation" OR '
    '"RAG system" OR '
    '"retrieval-augmented language model" OR '
    '"RAG-based generation"'
)
all_fields_query = (
    'privacy OR "personal data" OR PII OR fairness OR '
    '"bias mitigation" OR debiasing OR transparency OR '
    '"source attribution" OR "citation clarity" OR "ethical AI" OR "responsible AI"'
)
cat_query = 'cat:cs.*'
final_query = f'(ti:{title_query}) AND ({all_fields_query}) AND ({cat_query})'

search = arxiv.Search(
    query=final_query,
    max_results=200,
    sort_by=arxiv.SortCriterion.SubmittedDate
)

os.makedirs('arxiv_bibtex', exist_ok=True)
count = 0

def to_bibtex(result):
    authors = " and ".join([a.name for a in result.authors])
    year = str(result.published.year)
    title = result.title.replace('\n', ' ').replace('{', '').replace('}', '')
    arxiv_id = result.entry_id.split('/')[-1]
    url = result.entry_id
    category = result.primary_category if hasattr(result, "primary_category") else ""
    abstract = result.summary.replace('\n', ' ').replace('{', '').replace('}', '')
    pdf_url = result.pdf_url if hasattr(result, "pdf_url") else ""
    # DOI 있을 경우 추가
    doi = ""
    if hasattr(result, "doi") and result.doi:
        doi = f"  doi={{ {result.doi} }},\n"
    bib = (
        f"@article{{{arxiv_id},\n"
        f"  title={{ {title} }},\n"
        f"  author={{ {authors} }},\n"
        f"  year={{ {year} }},\n"
        f"  journal={{ arXiv preprint arXiv:{arxiv_id} }},\n"
        f"  url={{ {url} }},\n"
        f"  eprint={{ {arxiv_id} }},\n"
        f"  archivePrefix={{arXiv}},\n"
        f"  primaryClass={{ {category} }},\n"
        f"  pdf={{ {pdf_url} }},\n"
        f"{doi}"
        f"  abstract={{ {abstract} }}\n"
        f"}}\n"
    )
    return bib

for result in search.results():
    year = int(str(result.published)[:4])
    if year >= 2018:
        bibtex_str = to_bibtex(result)
        arxiv_id = result.entry_id.split('/')[-1]
        bib_path = os.path.join('arxiv_bibtex', f"{arxiv_id}.bib")
        with open(bib_path, "w", encoding="utf-8") as f:
            f.write(bibtex_str)
        count += 1

print(f"\n✅ BibTeX export complete! {count} records saved in the arxiv_bibtex folder.")


  for result in search.results():
