In [1]:
from bs4 import BeautifulSoup
import logging
import numpy as np
import pandas as pd
import requests
from tqdm import tqdm

In [2]:
link_template = 'https://dblp.org/db/conf/icml/icml2024.html'
def get_title_author(link):
    resp = requests.get(link, proxies=None)
    soup = BeautifulSoup(resp.content, 'html.parser')
    uls = soup.find_all('ul', class_='publ-list')
    titles, authors = [], []
    for ul in uls[1:]:  # 遍历所有 ul
        articles = ul.find_all('li', class_='entry inproceedings')  # 找到当前 ul 下的所有 li
        print(f"Found {len(articles)} articles in one ul.")
        for li in articles:
            title = li.find('span', class_='title').text
            author_list = [a.text for a in li.find_all('span', itemprop='name')]

            titles.append(title)
            authors.append(", ".join(author_list[:-1]))

    return titles, authors

In [3]:
titles, authors = get_title_author(link_template)
print(titles, authors)

Found 144 articles in one ul.
Found 191 articles in one ul.
Found 2275 articles in one ul.
['Probabilistic Inference in Language Models via Twisted Sequential Monte Carlo.', 'Position: Open-Endedness is Essential for Artificial Superhuman Intelligence.', 'Stop Regressing: Training Value Functions via Classification for Scalable Deep RL.', 'Improving Transformers with Dynamically Composable Multi-Head Attention.', 'Learning Useful Representations of Recurrent Neural Network Weight Matrices.', 'Evolution of Heuristics: Towards Efficient Automatic Algorithm Design Using Large Language Model.', 'SceneCraft: An LLM Agent for Synthesizing 3D Scenes as Blender Code.', 'Doubly Robust Causal Effect Estimation under Networked Interference via Targeted Learning.', 'Emergent Equivariance in Deep Ensembles.', 'Evaluation of LLMs on Syntax-Aware Code Fill-in-the-Middle Tasks.', 'Position: Automatic Environment Shaping is the Next Frontier in RL.', 'Online Matching with Stochastic Rewards: Provable B

In [4]:
print(len(titles), len(authors))


2610 2610


In [5]:
def graph_mask(title):
    t = title.lower()
    conditions = ['graph neural networks', 'gnns', 'gnn', 'graph', 'graph learning', 'graph embedding', 'network embedding']
    return np.any([c in t for c in conditions])

In [6]:
with open(f'2024-ICML.txt', 'w', encoding='utf8') as fw:
    for t, n in zip(titles, authors):
        if graph_mask(t):
            fw.write(f'1. **{t}**\n\n')
            fw.write(f'    *{n}*\n\n')