# Scan accepted papers to find authors to invite

In [1]:
import re
import os
from collections import defaultdict
import requests
import time
from bs4 import BeautifulSoup
import pickle

from tqdm.auto import tqdm
from dataclasses import dataclass

import unidecode  # used to handle names with unicode characters

## Load papers

In [2]:
@dataclass
class Paper:
    conference: str
    abstract: str
    authors: list
    title: str
    url: str

In [3]:
with open("2019-2021-icml-iclr-neurips-papers.pkl", "rb") as f:
    all_papers = pickle.load(f)

In [4]:
len(all_papers)

8415

## Find relevant titles / abstracts

In [5]:
dgm_abstract_keywords = [
    r"(normal\w* )flows?",
    r"vaes?(\s|[\.,;:])",
    r"variational autoencoder",
    r"gans?(\s|[\.,;:])",
    r"generative adversarial",
    r"generative model\w*",
#     r"probabilistic model\w*",
    r"autoencoder",
    r"latent variable",
]

In [6]:
relevant_papers = []
for paper in all_papers:
    abstract = paper.abstract.replace("\n", " ").lower()
    has_keywords = any(re.search(k, abstract) for k in dgm_abstract_keywords)
    if has_keywords:
        relevant_papers.append(paper)
    
    del paper, abstract

In [7]:
len(relevant_papers)

992

## Find authors (sometimes names are published with/without accents so I'm removing all accents)

In [8]:
def standardize_author_name(name):
    return unidecode.unidecode(name).lower()

In [9]:
authors_to_papers = defaultdict(list)
for paper in relevant_papers:
    for a in paper.authors:
        authors_to_papers[standardize_author_name(a)].append(paper)

In [10]:
first_authors_to_papers = defaultdict(list)
for paper in relevant_papers:
    for a in paper.authors[:1]:
        first_authors_to_papers[standardize_author_name(a)].append(paper)

## Select authors to invite

### Option 1: people with >= 2 first author papers

In [None]:
selected = [a for a, v in first_authors_to_papers.items() if len(v) >= 2]
for i, a in enumerate(selected):
    print(f"{i+1} {a.title():<25s} link to a paper: {authors_to_papers[a][0].url}")

### Option 2: people with names on >= 3 papers (any authorship position)

In [None]:
selected = [a for a, v in authors_to_papers.items() if len(v) >= 3]
for i, a in enumerate(selected):
    print(f"{i+1} {a.title():<25s} link to a paper: {authors_to_papers[a][0].url}")