# Download the blind ICLR2025 data from OpenReview

In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import pylab as plt

import requests
import time

In [2]:
%%time

# Download titles/abstracts/authors of all papers (this part is fast)

titles = []
abstracts = []
years = []
forum_ids = []
decisions = []
authors = []
keywords = []

for year in [2025]:
    print(year, end=': ')
    for query in ['submission', 'Submission', 'Blind_Submission', 
                  'Withdrawn_Submission', 'Rejected_Submission', 
                  'Desk_Rejected_Submission', '']:
        if year <= 2017:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2Fconference%2F-%2F{query}'
        elif year <= 2023:
            if query == '':
                continue
            url = f'https://api.openreview.net/notes?invitation=ICLR.cc%2F{year}%2FConference%2F-%2F{query}'
        else:
            if query != '':
                query = '/' + query
            url = f'https://api2.openreview.net/notes?content.venueid=ICLR.cc/{year}/Conference{query}'        
            
        for offset in range(0, 10_000, 1000):
            df = pd.DataFrame(requests.get(url + f'&offset={offset}').json()['notes'])
            if len(df) > 0:
                print(len(df), end=' ')
                if year < 2024:
                    titles    += [d['title'].strip() for d in df['content'].values]
                    abstracts += [d['abstract'].strip() for d in df['content'].values]
                    keywords  += [d['keywords'] for d in df['content'].values]
                    authors   += [', '.join(d['authors']) for d in df['content']]
                else:
                    titles    += [d['title']['value'].strip() for d in df['content'].values]
                    abstracts += [d['abstract']['value'].strip() for d in df['content'].values]
                    keywords  += [d['keywords']['value'] for d in df['content'].values]
                    if 'authors' in df['content'].values[0]:
                        authors   += [', '.join(d['authors']['value']) for d in df['content'].values]
                    else:
                        authors += [''] * len(df)
                years     += [year] * len(df)
                forum_ids += list(df.forum)
                                                
                if 'Withdrawn_Submission' in query:
                    decisions += ['Withdrawn'] * len(df)
                elif 'Desk_Rejected_Submission' in query:
                    decisions += ['Desk rejected'] * len(df)
                elif 'Rejected_Submission' in query:
                    decisions += ['Reject'] * len(df)    
                else:
                    decisions += [''] * len(df)
            else:
                break
    print('')
print('')

print(f'Found {len(titles)} papers\n')

2025: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 67 14 

Found 10081 papers

CPU times: user 1.66 s, sys: 153 ms, total: 1.82 s
Wall time: 23.7 s


In [3]:
# Prepare the dataframe

iclr = pd.DataFrame.from_dict({
    'year': np.array(years).astype(int), 
    'id': forum_ids, 
    'title': titles, 
    'abstract': abstracts,
    'authors': authors,
    'decision': decisions,
    'scores': [[]] * len(abstracts),
    'keywords': [[kk.lower() for kk in k] for k in keywords],
})

# Kicking out nonsense abstracts

mask = np.array([len(a) >= 100 for a in iclr.abstract])

print(f'Removing {np.sum(~mask)} submissions with abstract length below 100 characters:')
for abstract in iclr[~mask]['abstract'].values:
    print('  ' + abstract)    
iclr = iclr[mask].reset_index(drop=True)

Removing 2 submissions with abstract length below 100 characters:
  xx
  xx


In [4]:
# Inspect

assert iclr.id.unique().size == len(iclr)

iclr

Unnamed: 0,year,id,title,abstract,authors,decision,scores,keywords
0,2025,5sRnsubyAK,Neuroacoustic Patterns: Constant Q Cepstral Co...,Early identification of neurodegenerative dise...,,,[],"[neurodegenerative disorder, constant q cepstr..."
1,2025,J1SGf2lyr6,A Feature-Aware Federated Learning Framework f...,The expansion of 5G networks has led to remark...,,,[],"[federated learning, anomaly detection, 5g net..."
2,2025,49ti6LOUw5,UnoLoRA: Single Low-Rank Adaptation for Effici...,Recent research has demonstrated the efficacy ...,,,[],"[lora, multi-task learning, peft]"
3,2025,zkNCWtw2fd,Synergistic Approach for Simultaneous Optimiza...,Information retrieval across different languag...,,,[],"[information retrieval, multilingualism and cr..."
4,2025,viQ1bLqKY0,EXecution-Eval: Can language models execute re...,"As language models (LLMs) advance, traditional...",,,[],"[large language model, evaluation, benchmark, ..."
...,...,...,...,...,...,...,...,...
10074,2025,InxxTu0Bfs,GIFT-SW: Gaussian noise Injected Fine-Tuning o...,Parameter Efficient Fine-Tuning (PEFT) methods...,"Maxim Zhelnin, Viktor Moskvoretskii, Egor Shve...",Desk rejected,[],"[large language models, parameter efficient fi..."
10075,2025,qXwVXj03nO,A collaborative Multi-Agent LLM Approach for K...,Retrieval-Augmented Generation (RAG) systems h...,"Ankur Krishna, surya ardham, Chetan Malhotra, ...",Desk rejected,[],"[large language models (llms), knowledge graph..."
10076,2025,SY70rVSr3M,Model Mimic Attack: Knowledge Distillation for...,The vulnerability of artificial neural network...,"Kirill Lukyanov, Andrew Perminov, Denis Turdak...",Desk rejected,[],"[black-box adversarial attack, knowledge disti..."
10077,2025,pjfvmZWFfX,EEE-Bench: A Comprehensive Multimodal Electric...,Recent studies on large language models (LLMs)...,"Ming Li, Jike Zhong, Tianle Chen, Konstantinos...",Desk rejected,[],"[benchmark, evaluation, large multimodal model]"


In [5]:
# Combine and save

iclr24v2 = pd.read_parquet('../data/iclr24v2.parquet')

iclr25v1 = pd.concat((iclr24v2, iclr.sort_values(by='id'))).reset_index(drop=True)

iclr25v1.to_parquet('../data/iclr25v1.parquet')