# Get OpenReview papers by API

In [31]:
import requests
import os 
import json 
import pandas as pd
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')

def fetch_and_categorize_papers(venue, year):
    offset = 0
    limit = 1000  # Adjust as needed based on API's maximum allowed limit per request
    all_papers = []
    
    venue_id = f"{venue}.cc/{year}/Conference"

    if os.path.exists(f'res/{venue}{year}.json'):
        return json.load(open(f'res/{venue}{year}.json', 'r'))

    while True:
        url = f"https://api2.openreview.net/notes?content.venueid={venue_id}&offset={offset}&limit={limit}"
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            data = response.json()
            if 'notes' not in data or not data['notes']:
                break  # Exit the loop if no more papers are available
            all_papers.extend(data['notes'])
            offset += limit  # Prepare offset for the next page of results
        except requests.exceptions.RequestException as e:
            print(f"Failed to retrieve data: {e}")
            return {}

    # Initialize dictionaries to hold categorized papers
    papers_by_type = {'Oral': [], 'Spotlight': [], 'Poster': []}
    
    # Categorize the papers
    for note in all_papers:
        venue_info = note['content'].get('venue', {})
        if 'value' in venue_info:
            venue_value = venue_info['value']
            if 'oral' in venue_value.lower():
                papers_by_type['Oral'].append(note)
            elif 'spotlight' in venue_value.lower():
                papers_by_type['Spotlight'].append(note)
            elif 'poster' in venue_value.lower():
                papers_by_type['Poster'].append(note)

    return papers_by_type


def filter_paper_by_topic(venue, year, topic, papers_by_type=None):
    venue_id = f"{venue}.cc/{year}/Conference"

    if not papers_by_type:
        fetch_and_categorize_papers(venue_id)
    # Print the results to verify
    # for paper_type, notes in papers_by_type.items():
    #     print(f"\n{paper_type} Papers:")
    #     if notes:
    #         for note in notes[:5]:  # Limiting to first 5 papers for brevity
    #             title = note.get('content', {}).get('title', 'No title available')
    #             authors = ", ".join(note.get('content', {}).get('authors', []))
    #             abstract = note.get('content', {}).get('abstract', 'No abstract available')
    #             tldr = note.get('content', {}).get('TLDR', 'No TL;DR available')
    #             keywords = note.get('content', {}).get('keywords', 'No keywords available')
    #             link = f"https://openreview.net/forum?id={note['id']}"
    #             print(f"Title: {title}")
    #             print(f"Authors: {authors}")
    #             print(f"Abstract: {abstract}")
    #             print(f"TL;DR: {tldr}")
    #             print(f"Keywords: {keywords}")
    #             print(f"Link: {link}")
    #             print("---")
    #     else:
    #         print("No papers found.")

    for paper_type, notes in papers_by_type.items():
        print(f"\n{paper_type} Papers:")
        if notes:
            for note in notes: 
                title = note.get('content', {}).get('title', 'No title available').get('value')
                authors = ", ".join(note.get('content', {}).get('authors', []).get('value'))
                abstract = note.get('content', {}).get('abstract', 'No abstract available').get('value')

                tldr = note.get('content', {}).get('TLDR', '')
                if tldr:
                    tldr = tldr['value']
                else:
                    tldr = 'No TL;DR available'

                keywords = note.get('content', {}).get('keywords', 'No keywords available').get('value')
                keywords = '; '.join(keywords)

                if (topic in tldr.lower()) or (topic in keywords.lower()) or (topic in abstract.lower()) or (topic in title.lower()):
                    # print(f"Reasoning Paper")
                    
                    link = f"https://openreview.net/forum?id={note['id']}"
                    print(f"Title: {title}")
                    print(f"Authors: {authors}")
                    # print(f"Abstract: {abstract}")
                    print(f"TL;DR: {tldr}")
                    print(f"Keywords: {keywords}")
                    print(f"Link: {link}")
                    print("---")
                    

def json_to_pandas(papers):
    df = pd.DataFrame()
    for paper_type, notes in papers.items():
        if notes:
            for note in tqdm(notes, desc=f'Processing {paper_type} papers...'): 
                title = note.get('content', {}).get('title', 'No title available').get('value')
                authors = note.get('content', {}).get('authors', 'None')
                if authors != None:
                    authors = ", ".join(authors)
                abstract = note.get('content', {}).get('abstract', 'No abstract available').get('value')

                tldr = note.get('content', {}).get('TLDR', '')
                if tldr:
                    tldr = tldr['value']
                else:
                    tldr = 'No TL;DR available'

                keywords = note.get('content', {}).get('keywords', 'No keywords available')
                if keywords != "No keywords available":
                    keywords = keywords.get('value')
                    keywords = '; '.join(keywords)
                    
                link = f"https://openreview.net/forum?id={note['id']}"

                tmp = {
                    'Title': title,
                    'Tag': paper_type,
                    'Keywords': keywords,
                    'TLDR': tldr,
                    'Authors': authors,
                    'Abstract': abstract,
                    'Link':link
                }
                df = df.append(tmp,ignore_index=True)
    return df 


In [32]:
venue = 'NeurIPS'
year = 2023
papers = fetch_and_categorize_papers(venue=venue, year=year)

with open(f'res/{venue}{year}.json', 'w+') as f:
    json.dump(papers, f)

df = json_to_pandas(papers)
df.to_csv(f'res/{venue}{year}.csv')

# filter_paper_by_topic(venue, year, 'reason', papers)

Processing Oral papers...: 100%|██████████| 67/67 [00:00<00:00, 231.62it/s]
Processing Spotlight papers...: 100%|██████████| 378/378 [00:01<00:00, 244.12it/s]
Processing Poster papers...: 100%|██████████| 2773/2773 [00:11<00:00, 235.38it/s]


In [33]:
venue = 'ICLR'
year = 2024
papers = fetch_and_categorize_papers(venue=venue, year=year)

with open(f'res/{venue}{year}.json', 'w+') as f:
    json.dump(papers, f)

df = json_to_pandas(papers)
df.to_csv(f'res/{venue}{year}.csv')

# filter_paper_by_topic(venue, year, 'reason', papers)

Processing Oral papers...: 100%|██████████| 86/86 [00:00<00:00, 259.66it/s]
Processing Spotlight papers...: 100%|██████████| 367/367 [00:01<00:00, 233.97it/s]
Processing Poster papers...: 100%|██████████| 1807/1807 [00:08<00:00, 216.35it/s]


In [3]:
venue = 'ICML'
year = 2024
papers = fetch_and_categorize_papers(venue=venue, year=year)

with open(f'res/{venue}{year}.json', 'w+') as f:
    json.dump(papers, f)

df = json_to_pandas(papers)
df.to_csv(f'res/{venue}{year}.csv')

## Filter paper with keywords

In [9]:
import json
import re

def extract_papers(file_path, keywords, focus):
    with open(file_path, 'r') as file:
        papers_by_type = json.load(file)
    
    results = []

    for paper_type, notes in papers_by_type.items():
        print(f"\n{paper_type} Papers:")
        if notes:
            for note in notes: 
                # title = note.get('content', {}).get('title', 'No title available').get('value')
                # authors = ", ".join(note.get('content', {}).get('authors', []).get('value'))
                # abstract = note.get('content', {}).get('abstract', 'No abstract available').get('value')

                # tldr = note.get('content', {}).get('TLDR', '')
                # if tldr:
                #     tldr = tldr['value']
                # else:
                #     tldr = 'No TL;DR available'

                # keywords = note.get('content', {}).get('keywords', 'No keywords available').get('value')
                # keywords = '; '.join(keywords)

                content = note.get('content', {})
                title = content.get('title', {}).get('value', '')
                abstract = content.get('abstract', {}).get('value', '')
                url = note.get('forum', '')
                full_url = f"https://openreview.net/forum?id={url}"
                
                if any(keyword.lower() in title.lower() for keyword in keywords) or \
                any(keyword.lower() in abstract.lower() for keyword in keywords):
                    if focus.lower() in title.lower() or focus.lower() in abstract.lower():
                        formatted_entry = f"[NeurIPS 2023] {title} [[paper]]({full_url}) \n"
                        results.append(formatted_entry)
    
    return results

# Example usage
file_path = 'res/NeurIPS2023.json'  # Replace with your actual file path
keywords = ['planning', 'plan', 'plans']  # Add other related keywords as needed
focus = 'LLM'

extracted_papers = extract_papers(file_path, keywords, focus)
for paper in extracted_papers:
    print(paper)



Oral Papers:

Spotlight Papers:

Poster Papers:
[NeurIPS 2023] Tree of Thoughts: Deliberate Problem Solving with Large Language Models [[paper]](https://openreview.net/forum?id=5Xc1ecxO1h) 

[NeurIPS 2023] On the Planning Abilities of Large Language Models - A Critical Investigation [[paper]](https://openreview.net/forum?id=X6dEqXIsEW) 

[NeurIPS 2023] Can Language Models Solve Graph Problems in Natural Language? [[paper]](https://openreview.net/forum?id=UDqHhbqYJV) 

[NeurIPS 2023] SwiftSage: A Generative Agent with Fast and Slow Thinking for Complex Interactive Tasks [[paper]](https://openreview.net/forum?id=Rzk3GP1HN7) 

[NeurIPS 2023] Parsel🐍: Algorithmic Reasoning with Language Models by Composing Decompositions [[paper]](https://openreview.net/forum?id=qd9qcbVAwQ) 

[NeurIPS 2023] EmbodiedGPT: Vision-Language Pre-Training via Embodied Chain of Thought [[paper]](https://openreview.net/forum?id=IL5zJqfxAa) 

[NeurIPS 2023] Language Models Don't Always Say What They Think: Unfaithf