## Imports

In [1]:
from os import environ as ENV
from time import sleep
import logging
import re
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Extract

### URLs

In [2]:
BASE_URL = "https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/"

In [3]:
def scrape_judge_urls(web_url: str) -> dict[str:str]:
    """Get the url for each list of judges"""
    
    try:
        response = requests.get(web_url, timeout=10)

        soup = BeautifulSoup(response.content, 'html.parser')

        judge_lists = soup.find_all(
            "a", class_="card__link")
        
        judge_dict = {}
        for judge_list in judge_lists:
            name = re.search(r"(( \(?[A-Z][a-z]*.*\)?)+)</a>", str(judge_list)
                             ).group(2).strip().lstrip("List of ")
            url = judge_list["href"]
            judge_dict[name] = url
        
        return judge_dict

    except requests.RequestException as error:
        logging.info(f"Error fetching URL: {error}")
        return []

In [4]:
judge_dict = scrape_judge_urls(BASE_URL)

In [5]:
judge_dict

{'Bench Chairs': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/bench-chairmen-list/',
 'District Judges (Magistrates’ Courts)': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/dj-mags-ct-list/',
 'Diversity and Community Relations Judges (DCRJs)': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/diversity-and-community-relations-judges-list/',
 'Diversity and Community Relations Magistrates': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/diversity-community-and-relations-magistrates/',
 'Judge Advocates General': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/jag-list/',
 'Circuit Judges': 'https://www.judiciary.uk/about-the-judiciary/who-are-the-judiciary/list-of-members-of-the-judiciary/circuit-judge-list/',
 'District 

### Bench Chairs

In [6]:
def scrape_BC(web_url: str) -> list[str]:
    """Get data from a list of judges with a URL"""

    try:
        response = requests.get(web_url, timeout=10)

        soup = BeautifulSoup(response.content, 'html.parser')

        regions = soup.find_all(
            "h2", class_="wp-block-heading")
        regions = [region.text for region in regions]

        region_tables = soup.find_all(
            "table", class_="govuk-table")
        region_tables = [[row.text
                          for row in table.find_all("td", class_="govuk-table__cell")
                          if "strong" not in str(row)]
                         for table in region_tables]

        rows = []
        for t, table in list(enumerate(region_tables)):
            for c in range(0, len(table), 2):
                rows.append({"region":regions[t],
                             "bench":table[c],
                             "name":table[c+1]})
        
        return pd.DataFrame(rows)
        
    except requests.RequestException as error:
        logging.info(f"Error fetching URL: {error}")
        return []

In [7]:
BC = scrape_BC(judge_dict["Bench Chairs"])

In [8]:
BC["first_name"] = BC["name"].str.split().str[0]

In [9]:
BC["last_name"] = BC["name"].str.split().str[1]

In [10]:
BC = BC.drop(columns=["name"])

In [11]:
BC

Unnamed: 0,region,bench,first_name,last_name
0,London,Central London,Maeve,Bromwich
1,London,East London,Tina,Hayhow
2,London,North East London,Robert,Chambers
3,London,North London,Gwilwyn,Wright
4,London,North West London,Valerie,Crookes
...,...,...,...,...
70,Wales,Ceredigion & Pembrokeshire,Jennie,Robson
71,Wales,Montgomeryshire,Stephen,Pembroke
72,Wales,North East Wales,Ceri,Hughes
73,Wales,North Central Wales,Hilary,Owen


### DJMC

In [16]:
def scrape_DJMC(web_url: str) -> list[str]:
    """Get data from a list of judges with a URL"""

    try:
        response = requests.get(web_url, timeout=10)

        soup = BeautifulSoup(response.content, 'html.parser')

        table = soup.find_all(
            "table", class_="govuk-table")

        cells = soup.find_all(
            "td", class_="govuk-table__cell")
        cells = [cell.text
                 for cell in cells
                 if "strong" not in str(cell)]

        rows = []
        for c in range(0, len(cells), 3):
            rows.append({"judge": cells[c],
                         "circuit": cells[c+1],
                         "appointment": cells[c+2]})

        return pd.DataFrame(rows)

    except requests.RequestException as error:
        logging.info(f"Error fetching URL: {error}")
        return []

In [17]:
scrape_DJMC(judge_dict["District Judges (Magistrates’ Courts)"])

Unnamed: 0,judge,circuit,appointment
0,District Judge (MC) Allen-Khimani,Midlands,08-06-19
1,District Judge (MC) Apted,North West,09-01-23
2,District Judge (MC) Austin,South West,14-04-23
3,District Judge (MC) Barnes,Midlands,06-02-23
4,District Judge (MC) Barron,South East,13-10-11
...,...,...,...
132,District Judge (MC) Wilkinson,Midlands,20-07-09
133,District Judge (MC) Williams,South East,03-01-22
134,District Judge (MC) Wilson,South East,01-11-21
135,District Judge (MC) Young,North East,16-03-20


## Transform

## Load