In [191]:
import gazpacho

In [192]:
URL = "https://en.wikipedia.org/wiki/List_of_world_records_in_swimming"

In [193]:
html = gazpacho.get(URL)

In [194]:
soup = gazpacho.Soup(html)

In [195]:
tables = soup.find("table")

In [196]:
RECORDS = (0, 1, 3, 4)
COURSES = ("LC Men", "LC Women", "SC Men", "SC Women")

In [197]:
list(zip(COURSES, RECORDS))

[('LC Men', 0), ('LC Women', 1), ('SC Men', 3), ('SC Women', 4)]

In [198]:
for table, course in zip(tables, COURSES):
    print(f"Course: {course}")
    for row in table.find("tr", mode="all")[1:]:
        columns = row.find("td", mode="all")
        event = columns[0].text
        time = columns[1].text
        print(f"{event} -> {time}")
    print()

Course: LC Men
50m freestyle -> 20.91
100m freestyle -> 46.40
200m freestyle -> 1:42.00
400m freestyle -> 3:39.96
800m freestyle -> 7:32.12
1500m freestyle -> 14:30.67
50m backstroke -> 23.55
100m backstroke -> 51.60
200m backstroke -> 1:51.92
50m breaststroke -> 25.95
100m breaststroke -> 56.88
200m breaststroke -> 2:05.48
50m butterfly -> 22.27
100m butterfly -> 49.45
200m butterfly -> 1:50.34
200m individual medley -> 1:52.69
400m individual medley -> 4:02.50
4 × 100 m freestyle relay -> 3:08.24
4 × 200 m freestyle relay -> 6:58.55
4 × 100 m medley relay -> 3:26.78

Course: LC Women
50m freestyle -> 23.61
100m freestyle -> 51.71
200m freestyle -> 1:52.23
400m freestyle -> 3:54.18
800m freestyle -> 8:04.12
1500m freestyle -> 15:20.48
50m backstroke -> 26.86
100m backstroke -> 57.13
200m backstroke -> 2:03.14
50m breaststroke -> 29.16
100m breaststroke -> 1:04.13
200m breaststroke -> 2:17.55
50m butterfly -> 24.43
100m butterfly -> 54.60
200m butterfly -> 2:01.81
200m individual medle

In [199]:
records = {}
for table, course in zip(tables, COURSES):
    records[course] = {}
    for row in table.find("tr", mode="all")[1:]:
        columns = row.find("td", mode="all")
        event = columns[0].text
        time = columns[1].text
        records[course][event] = time

In [200]:
print(records.keys())

dict_keys(['LC Men', 'LC Women', 'SC Men', 'SC Women'])


In [201]:
records["SC Women"]["100m backstroke"]

'48.16'

In [202]:
records = {}

tables = soup.find("table")

for idx, course in zip(RECORDS, COURSES):
    table = tables[idx]
    records[course] = {}
    for row in table.find("tr")[1:]:
        columns = row.find("td")
        event = columns[0].text.strip()
        time = columns[1].text.strip()
        records[course][event] = time


In [203]:
print( records["LC Men"]["100m butterfly"] )
print( records["LC Women"]["100m butterfly"] )
print( records["SC Men"]["100m butterfly"] )
print( records["SC Women"]["100m butterfly"])

49.45
54.60
47.68
52.71


In [204]:
conversions = {
    "Free": "freestyle",
    "Back": "backstroke",
    "Breast": "breaststroke",
    "Fly": "butterfly",
    "IM": "individual medley"
}

In [205]:
event = "Mike-15-100m-Fly.txt"
*_, distance, stroke = event.removesuffix(".txt").split("-")
lookup = f"{distance} {conversions[stroke]}"
lookup

'100m butterfly'

In [206]:
for course in records.keys():
    print( records[course][lookup] )

49.45
54.60
47.68
52.71


In [207]:
records = {}

# for table, course in zip(tables, COURSES):
#     records[course] = {}
for idx, course in zip(RECORDS, COURSES):
    table = tables[idx]
    records[course] = {}
    for row in table.find("tr", mode="all")[1:]:
        columns = row.find("td", mode="all")
        event = columns[0].text
        time = columns[1].text
        if "relay" not in event:
            records[course][event] = time

In [208]:
records["LC Men"]

{'50m freestyle': '20.91',
 '100m freestyle': '46.40',
 '200m freestyle': '1:42.00',
 '400m freestyle': '3:39.96',
 '800m freestyle': '7:32.12',
 '1500m freestyle': '14:30.67',
 '50m backstroke': '23.55',
 '100m backstroke': '51.60',
 '200m backstroke': '1:51.92',
 '50m breaststroke': '25.95',
 '100m breaststroke': '56.88',
 '200m breaststroke': '2:05.48',
 '50m butterfly': '22.27',
 '100m butterfly': '49.45',
 '200m butterfly': '1:50.34',
 '200m individual medley': '1:52.69',
 '400m individual medley': '4:02.50'}

In [209]:
import json

In [210]:
with open("records.json", "w") as jf:
    json.dump(records, jf)

In [211]:
text = "Darius-13-100m-IM.txt"

In [212]:
def event_lookup(text):
    conversions = {
        "Free": "freestyle",
        "Back": "backstroke",
        "Breast": "breaststroke",
        "Fly": "butterfly",
        "IM": "individual medley"
    }

    *_, distance, stroke = text.removesuffix(".txt").split("-")
    
    return f"{distance} {conversions[stroke]}"


In [213]:
import json

In [214]:
with open("records.json") as f:
    records = json.load(f)

In [215]:
records["LC Men"][event_lookup("Darius-13-100m-Fly.txt")]

'49.45'