In [1]:
from pathlib import Path
from collections import defaultdict, Counter

In [2]:
raw_data = Path("./bibliography.bib").read_text()


In [3]:
entries = {}
for entry in raw_data.split("@")[1:]:
    item = {}
    line = entry.split('\n')[0]
    try:
        item['ref_type'] = entry.split("{")[0].strip()
        item['id'] = entry.split("{")[1].split(",")[0].strip()
        for line in entry.split("\n")[1:]:
            line = line.strip()
            if not line or line == '}' or line[0] == '%': continue
            key, *values = line.split("=")
            value = "=".join(values).strip().strip(",").strip("{").strip("}").strip('"').strip("'").strip()
            item[key.strip()] = value
    except Exception as e:
        print('#################### ERROR ####################')
        print(line)
        print(item)
        print(e)
    entries[item['id']] = item


In [None]:
variations = defaultdict(list)
for entry in entries.values():
    for key, value in entry.items():
        if key == 'id': continue
        variations[key].append(value)

for key, values in variations.items():
    counts = Counter(values)
    print(f'# {key}')
    remaining = 0
    remaining_count = 0
    for i, (value, count) in enumerate(sorted(counts.items(), key=lambda x: x[1], reverse=True)):
        if i < 10 and count > 1 or len(counts) < 10:
            print(f'{i+1}. {value}: {count}')
        else:
            remaining += 1
            remaining_count += count
    if remaining > 0:
        print(f'remaining: {remaining} ({remaining_count})')
    print()

In [None]:
# print all for selected fields
for name in ['journal', 'booktitle', 'publisher']:
    print(f'# {name}')
    for x in sorted(set(variations[name])):
        print(x, len([y for y in variations[name] if y == x]))
    print()


In [6]:
# open all arxiv links in browser
def open_arxiv_link():
    import subprocess
    import urllib.parse
    for entry in entries.values():
        if entry.get('archivePrefix') == 'arXiv':
            url = f'https://scholar.google.de/scholar?q={urllib.parse.quote(entry["title"])}'
            print(url)
            subprocess.run([r'YOUR_BROWSER_PATH', url])

# open_arxiv_link()