In [24]:
from typing import NamedTuple
from datetime import datetime

class AircraftModel(NamedTuple):
    model_id: str
    model_name: str
    manufacturer: str
    release_date: datetime

In [27]:
import requests
from bs4 import BeautifulSoup
from typing import Iterable
import unicodedata

def datetime_string_datetime(s: str) -> datetime:
    formats = ("%d %B %Y", "%B %d, %Y")
    for format in formats:
        try:
            return datetime.strptime(s, format)
        except ValueError:
            continue
    

def get_date_and_manufacturer(url: str) -> tuple[datetime, str]:
    ret = requests.get(url)
    ret.raise_for_status()
    
    soup = BeautifulSoup(ret.content, "lxml")
    infobox = soup.find("table", class_="infobox")
    body = infobox.find("tbody")
    td1 = body.find(lambda t: t.get_text(strip=True) in ('Manufacturer', "Design group"))
    manufacturer = td1.find_next("td").get_text(strip=True)
    
    td1 = body.find(lambda t: t.get_text(strip=True) == 'First flight')
    date = unicodedata.normalize("NFKD", td1.find_next("td").contents[0].split("\n")[0].split(";")[0].split(":")[-1].strip())
    date = datetime_string_datetime(date)
    
    return date, manufacturer
    
def get_aircraft_model() -> Iterable[AircraftModel]:
    ret = requests.get("https://en.wikipedia.org/wiki/List_of_aircraft_type_designators")
    ret.raise_for_status()
    
    soup = BeautifulSoup(ret.content, "lxml")
    table = soup.find("table", class_="wikitable")
    body = table.find("tbody")
    
    for row in body.find_all("tr"):
        cols = row.find_all("td")
        if not cols:
            continue
            
        model_id = cols[0].get_text(strip=True)
        model_name = cols[2].find("a").get_text(strip=True)
        if not model_name.startswith("Boeing") and not model_name.startswith("Airbus"):
            continue
        model_url = "https://en.wikipedia.org/" + cols[2].find("a")["href"]
        date, manufacturer = get_date_and_manufacturer(model_url)
        yield AircraftModel(
            model_id=model_id,
            model_name=model_name,
            manufacturer=manufacturer,
            release_date=date
        )

trans = str.maketrans({"'": "''", "&": r"\&"})
make_sql_nice = lambda s: s.translate(trans)


for aircraft_model in get_aircraft_model():
    print(f"    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) "
          f"VALUES ('{make_sql_nice(aircraft_model.model_id)}', '{make_sql_nice(aircraft_model.model_name)}', '{make_sql_nice(aircraft_model.manufacturer)}', 1, TO_DATE('{aircraft_model.release_date:%Y-%m-%d}', 'YYYY-MM-DD'))")

    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) VALUES ('A19N', 'Airbus A319neo', 'Airbus', 1, TO_DATE('2014-09-25', 'YYYY-MM-DD'))
    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) VALUES ('A20N', 'Airbus A320neo', 'Airbus', 1, TO_DATE('2014-09-25', 'YYYY-MM-DD'))
    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) VALUES ('A21N', 'Airbus A321neo', 'Airbus', 1, TO_DATE('2016-02-09', 'YYYY-MM-DD'))
    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) VALUES ('A306', 'Airbus A300-600', 'Airbus', 1, TO_DATE('1972-10-28', 'YYYY-MM-DD'))
    INTO AIRCRAFT_MODEL (AIRCRAFT_MODEL_ID, MODEL_NAME, MODEL_MANUFACTURER, NUMBER_OF_SEAT, RELEASE_DATE) VALUES ('A30B', 'Airbus A300B2, A300B4 and A300C4', 'Airbus', 1, TO_DATE('1972-10-28', 'YYYY-MM-DD'))
    INTO AIRCRAFT_MODEL (AIRCRA