In [None]:
pip install feedparser sentence-transformers scikit-learn

In [16]:
import requests
import feedparser
import uuid
import json


cs_categories = [
    "cs.AI", "cs.AR", "cs.CC", "cs.CE", "cs.CG", "cs.CL", "cs.CR", "cs.CV",
    "cs.CY", "cs.DB", "cs.DC", "cs.DL", "cs.DM", "cs.DS", "cs.ET", "cs.FL",
    "cs.GL", "cs.GR", "cs.GT", "cs.HC", "cs.IR", "cs.IT", "cs.LG", "cs.LO",
    "cs.MA", "cs.MM", "cs.MS", "cs.NA", "cs.NE", "cs.NI", "cs.OH", "cs.OS",
    "cs.PF", "cs.PL", "cs.RO", "cs.SC", "cs.SD", "cs.SE", "cs.SI", "cs.SY"
]

def fetch_arxiv_papers(category, max_results=10):
    base_url = "http://export.arxiv.org/api/query?"
    query = f"search_query=cat:{category}&start=0&max_results={max_results}&sortBy=submittedDate&sortOrder=descending"
    response = requests.get(base_url + query)
    if response.status_code != 200:
        print(f"Failed to fetch {category}")
        return []
    feed = feedparser.parse(response.content)
    papers = []
    for entry in feed.entries:
        papers.append({
            "id": str(uuid.uuid4()),
            "title": entry.title.strip(),
            "summary": entry.summary.strip(),
            "published": entry.published,
            "arxiv_url": entry.link,
            "category": category
        })
    return papers

all_cs_papers =[]
for cat in cs_categories:
    print(f"Fetching {cat}")
    all_cs_papers.extend(fetch_arxiv_papers(cat, max_results=100))

with open("arxiv_cs_papers.json","w") as f:
    json.dump(all_cs_papers, f, indent=2)

print(f"Total papers fetched: {len(all_cs_papers)}")

Fetching cs.AI
Fetching cs.AR
Fetching cs.CC
Fetching cs.CE
Fetching cs.CG
Fetching cs.CL
Fetching cs.CR
Fetching cs.CV
Fetching cs.CY
Fetching cs.DB
Fetching cs.DC
Fetching cs.DL
Fetching cs.DM
Fetching cs.DS
Fetching cs.ET
Fetching cs.FL
Fetching cs.GL
Fetching cs.GR
Fetching cs.GT
Fetching cs.HC
Fetching cs.IR
Fetching cs.IT
Fetching cs.LG
Fetching cs.LO
Fetching cs.MA
Fetching cs.MM
Fetching cs.MS
Fetching cs.NA
Fetching cs.NE
Fetching cs.NI
Fetching cs.OH
Fetching cs.OS
Fetching cs.PF
Fetching cs.PL
Fetching cs.RO
Fetching cs.SC
Fetching cs.SD
Fetching cs.SE
Fetching cs.SI
Fetching cs.SY
Total papers fetched: 4000


In [17]:
from sentence_transformers import SentenceTransformer

def embed_texts(texts, model_name="all-MiniLM-L6-v2"):
    model = SentenceTransformer(model_name)
    return model.encode(texts, convert_to_tensor=False, normalize_embeddings=False)

with open("arxiv_cs_papers.json", "r") as f:
    new_papers = json.load(f)

new_summaries = [paper["summary"] for paper in new_papers]
new_embeddings = embed_texts(new_summaries)

In [22]:
user_corpus = [
  {
    "id": "8a9258ed-dcc6-4c5f-a76e-ef4411a1dce5",
    "title": "VerifyBench: Benchmarking Reference-based Reward Systems for Large\n  Language Models",
    "summary": "Large reasoning models such as OpenAI o1 and DeepSeek-R1 have achieved\nremarkable performance in the domain of reasoning. A key component of their\ntraining is the incorporation of verifiable rewards within reinforcement\nlearning (RL). However, existing reward benchmarks do not evaluate\nreference-based reward systems, leaving researchers with limited understanding\nof the accuracy of verifiers used in RL. In this paper, we introduce two\nbenchmarks, VerifyBench and VerifyBench-Hard, designed to assess the\nperformance of reference-based reward systems. These benchmarks are constructed\nthrough meticulous data collection and curation, followed by careful human\nannotation to ensure high quality. Current models still show considerable room\nfor improvement on both VerifyBench and VerifyBench-Hard, especially\nsmaller-scale models. Furthermore, we conduct a thorough and comprehensive\nanalysis of evaluation results, offering insights for understanding and\ndeveloping reference-based reward systems. Our proposed benchmarks serve as\neffective tools for guiding the development of verifier accuracy and the\nreasoning capabilities of models trained via RL in reasoning tasks.",
    "published": "2025-05-21T17:54:43Z",
    "arxiv_url": "http://arxiv.org/abs/2505.15801v1",
    "category": "cs.AI"
  },
]

user_summaries = [doc["summary"] for doc in user_corpus]
user_embeddings = embed_texts(user_summaries)

In [23]:
from sklearn.metrics.pairwise import cosine_similarity

def rank_by_similarity(new_papers, new_embeddings, user_embeddings, threshold=0.7):
    sim_matrix = cosine_similarity(new_embeddings, user_embeddings)
    max_scores = sim_matrix.max(axis=1)
    ranked = [
        {
            "title": new_papers[i]['title'],
            "summary": new_papers[i]['summary'],
            "url": new_papers[i]['arxiv_url'],
            "published": new_papers[i]['published'],
            "score": float(max_scores[i])
        }
        for i in range(len(new_papers)) if max_scores[i] >= threshold
    ]
    return sorted(ranked, key=lambda x: x["score"], reverse=True)

ranked_matches = rank_by_similarity(new_papers, new_embeddings, user_embeddings)

with open("ranked_matches.json","w") as f:
    json.dump(ranked_matches, f, indent=2)

print(f"Found {len(ranked_matches)} relevant new papers")
for match in ranked_matches[:5]:
    print(f"{match['title']} ({match['score']:.2f})")
    print(match['url'], "\n")

Found 2 relevant new papers
VerifyBench: Benchmarking Reference-based Reward Systems for Large
  Language Models (1.00)
http://arxiv.org/abs/2505.15801v1 

VerifyBench: Benchmarking Reference-based Reward Systems for Large
  Language Models (1.00)
http://arxiv.org/abs/2505.15801v1 

