In [19]:
import bs4
import requests
import re
from urllib.parse import urljoin
from collections import OrderedDict
import json

In [20]:
base_url = 'https://www.bger.ch/index/federal/federal-inherit-template/federal-richter/'
mydict = lambda: OrderedDict(mydict)
results = mydict()

TypeError: 'function' object is not iterable

In [None]:
def getGender(url: str):
    r = requests.get(url)
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    if data.find(id="maincontent"):
        text = str(data.find(id="maincontent").contents)
    else:
        text= str(data.contents)
    if re.search(r'[Rr]ichterin|[Eée]lue', text):
        return 'f'
    else:
        return 'm'

In [None]:
def bundesrichter_handler(url: str):
    r = requests.get('https://www.plaedoyer.ch/artikel/artikeldetail/parteiangehoerigkeit-der-bundesrichter/')
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    rows = data.find_all('tr')
    people_db = []
    for row in rows:
        if row.find('td').string and len(row.find_all('td'))>1:
            people_db.append({'name': row.find('td').string, 'party': row.find_all('td')[1].string})
    url = urljoin(base_url, url)
    r = requests.get(url)  # get starting page
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    divs = data.find_all('ul', class_='article-paragraf')
    persons = []
    for div in divs:
        links = div.find_all("a")  # find all links
        for link in links:
            name = re.sub(r' \(.*\)', "", link.string)
            gender = getGender(urljoin(base_url, link.get('href')))
            persons.append({'name': name, 'gender': gender})
    for person in persons:
        for comp in people_db: 
            if set(person['name'].split()) == set(comp['name'].split()):
                person['party'] = comp['party']
    return persons

In [None]:
def nebenamtliche_richter_handler(url: str):
    r = requests.get('https://www.plaedoyer.ch/artikel/artikeldetail/parteiangehoerigkeit-der-bundesrichter/')
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    rows = data.find_all('tr')
    people_db = []
    for row in rows:
        if row.find('td').string and len(row.find_all('td'))>1:
            people_db.append({'name': row.find('td').string, 'party': row.find_all('td')[1].string})
    url = urljoin(base_url, url)
    r = requests.get(url)  # get starting page
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    main = data.find(id="maincontent")
    list_points = main.find_all('li')
    persons = []
    for list_point in list_points:
        name = re.sub(r',.*', "", list_point.string)
        persons.append({'name': name})
    for person in persons:
        for comp in people_db: 
            if set(person['name'].split()) == set(comp['name'].split()):
                person['party'] = comp['party']
    return persons

In [None]:
def gerichtsschreiber_handler(url: str):
    url = urljoin(base_url, url)
    r = requests.get(url)  # get starting page
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    main = data.find(id="maincontent")
    list_points = main.find_all('li')
    persons = []
    for list_point in list_points:
        name = re.sub(r',.*', "", list_point.string)
        persons.append({'name': name})
    return persons

In [None]:
def ehemalige_bundesrichter(url: str):
    url = urljoin(base_url, url)
    r = requests.get(url)  # get starting page
    data = bs4.BeautifulSoup(r.text, "html.parser")  # parse html
    main = data.find(id="maincontent")
    list_points = main.find('tbody').find_all('tr')
    persons = []
    for list_point in list_points:
        tds = list_point.find_all('td')
        javascript = tds[0].find('a')['onclick']
        link = re.search('\'.*?\'', javascript).group(0)
        gender = getGender(urljoin(base_url, link[1:-1]))
        name = tds[0].find('a').text.strip() + " " + tds[1].text.strip()
        persons.append({'name': name, 'gender': gender})
    return persons

In [None]:
page_objects = [
    {
        'url': 'federal-richter-bundesrichter.htm',
        'handler': bundesrichter_handler,
        'category': 'judges',
        'sub_category': 'federal_judge',
        'id': 'Bundesrichter'
    },
    {
        'url': 'federal-richter-nebenamtlicherichter.htm',
        'handler': nebenamtliche_richter_handler,
        'category': 'judges',
        'sub_category': 'deputy_federal_judge',
        'id': 'Nebenamtliche Richter'
    },
    {
        'url': 'federal-richter-gerichtsschreiber.htm',
        'handler': gerichtsschreiber_handler,
        'category': 'clerks',
        'sub_category': 'federal_clerk',
        'id': 'Gerichtsschreiber'
    },
    {
        'url': '/index/federal/federal-inherit-template/federal-status/federal-richter-altebundesrichter/federal-richter-altebundesrichter-liste.htm',
        'handler': ehemalige_bundesrichter,
        'category': 'judges',
        'sub_category': 'federal_judge',
        'id': 'ehemalige Bundesrichter'
    }
]

In [None]:
for obj in page_objects:
    persons = obj['handler'](obj['url'])
    results[obj['category']][obj['sub_category']][obj['id']] = persons

In [26]:
for category in results:
    for subcategory in results[category]:
        for cat_id in results[category][subcategory]:
            results[category][subcategory][cat_id] = sorted(results[category][subcategory][cat_id], key=lambda x:x['name'])
        

[{'name': 'Abrecht Theodor', 'gender': 'm'}, {'name': 'Aemisegger Heinz', 'gender': 'm'}, {'name': 'Aepli Arnold Otto', 'gender': 'm'}, {'name': 'Aeschlimann Arthur', 'gender': 'm'}, {'name': 'Affolter Albert', 'gender': 'm'}, {'name': 'Affolter Hans', 'gender': 'm'}, {'name': 'Allemann-Reichle Albert', 'gender': 'm'}, {'name': 'Allet Alexis', 'gender': 'm'}, {'name': 'Andermatt Josef', 'gender': 'm'}, {'name': 'Anderwert Fridolin', 'gender': 'm'}, {'name': 'Antognini Fulvio', 'gender': 'm'}, {'name': 'Arnold Eduard', 'gender': 'm'}, {'name': 'Attenhofer Karl', 'gender': 'm'}, {'name': 'Bachmann Jakob Huldreich', 'gender': 'm'}, {'name': 'Bachtler Fritz', 'gender': 'm'}, {'name': 'Berenstein Alexandre', 'gender': 'm'}, {'name': 'Betschart Gerold', 'gender': 'm'}, {'name': 'Bezzola Andrea', 'gender': 'm'}, {'name': 'Bianchi Sergio', 'gender': 'm'}, {'name': 'Bigler-Eggenberg Margrith', 'gender': 'f'}, {'name': 'Blocher Eugen', 'gender': 'm'}, {'name': 'Blumer Johann Jakob', 'gender': 'm

In [None]:
f = open("legal_info/personal_information.json", "w")
f.write(json.dumps(results, indent=2))
f.close()