In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time

In [None]:
def get_paper_links(base_url, max_papers):
    """Scrapes the main page to get all paper links."""
    response = requests.get(base_url)
    if response.status_code != 200:
        print(f"Failed to retrieve page: {base_url}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')
    paper_links = []
    for link in soup.find_all('a', href=True):
        if '/virtual/2022/poster/' in link['href']:
            paper_links.append(f"https://nips.cc{link['href']}")
        if len(paper_links) >= max_papers:
            break

    return paper_links

def get_paper_details(paper_url):
    """Extracts the title and abstract of a given paper."""
    response = requests.get(paper_url)
    if response.status_code != 200:
        print(f"Failed to retrieve paper: {paper_url}")
        return None, None

    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract title
    title_tag = soup.find('h2', class_='card-title main-title text-center')
    title = title_tag.text.strip() if title_tag else 'Title not found'

    # Extract abstract
    abstract_div = soup.find('div', id='abstract_details')
    if abstract_div:
        abstract_paragraph = abstract_div.find('p')
        abstract = abstract_paragraph.text.replace('Abstract:', '').strip() if abstract_paragraph else 'Abstract not found'
    else:
        abstract = 'Abstract not found'

    return title, abstract

def scrape_neurips():
    base_url = "https://nips.cc/virtual/2022/papers.html?filter=titles&search="
    output_file = "nips2022.csv"
    max_papers = 250

    # Step 1: Get all paper links
    paper_links = get_paper_links(base_url, max_papers)
    print(f"Found {len(paper_links)} papers.")

    # Step 2: Scrape details for each paper
    with open(output_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Title", "Abstract", "URL"])

        for idx, paper_url in enumerate(paper_links):
            print(f"Scraping paper {idx + 1}/{len(paper_links)}: {paper_url}")
            title, abstract = get_paper_details(paper_url)
            writer.writerow([title, abstract, paper_url])
            time.sleep(1)  # To Avoid overloading the server

    print(f"Scraping complete. Data saved to {output_file}.")

if __name__ == "__main__":
    scrape_neurips()

Found 250 papers.
Scraping paper 1/250: https://nips.cc/virtual/2022/poster/55706
Scraping paper 2/250: https://nips.cc/virtual/2022/poster/55756
Scraping paper 3/250: https://nips.cc/virtual/2022/poster/55749
Scraping paper 4/250: https://nips.cc/virtual/2022/poster/55761
Scraping paper 5/250: https://nips.cc/virtual/2022/poster/55716
Scraping paper 6/250: https://nips.cc/virtual/2022/poster/55690
Scraping paper 7/250: https://nips.cc/virtual/2022/poster/55630
Scraping paper 8/250: https://nips.cc/virtual/2022/poster/55769
Scraping paper 9/250: https://nips.cc/virtual/2022/poster/55772
Scraping paper 10/250: https://nips.cc/virtual/2022/poster/55745
Scraping paper 11/250: https://nips.cc/virtual/2022/poster/55750
Scraping paper 12/250: https://nips.cc/virtual/2022/poster/55724
Scraping paper 13/250: https://nips.cc/virtual/2022/poster/55652
Scraping paper 14/250: https://nips.cc/virtual/2022/poster/55657
Scraping paper 15/250: https://nips.cc/virtual/2022/poster/55766
Scraping paper 1

In [None]:
import pandas as pd

In [None]:
df= pd.read_csv("nips2022.csv")
df.head()

Unnamed: 0,Title,Abstract,URL
0,SurDis: A Surface Discontinuity Dataset for We...,"According to World Health Organization, there ...",https://nips.cc/virtual/2022/poster/55706
1,MOMA-LRG: Language-Refined Graphs for Multi-Ob...,"Video-language models (VLMs), large models pre...",https://nips.cc/virtual/2022/poster/55756
2,VLMbench: A Compositional Benchmark for Vision...,Benefiting from language flexibility and compo...,https://nips.cc/virtual/2022/poster/55749
3,AnimeRun: 2D Animation Visual Correspondence f...,Visual correspondence of 2D animation is the c...,https://nips.cc/virtual/2022/poster/55761
4,Pythae: Unifying Generative Autoencoders in Py...,"In recent years, deep generative models have a...",https://nips.cc/virtual/2022/poster/55716
