In [4]:
year = '2024'
race = 'Emilia Romagna'
p = QualiParser()
grid = p.get_grid(year, race)
for x in grid:
    print (str(x))



Verstappen
Piastri
Norris
Leclerc
Sainz
Russell
Tsunoda
Hamilton
Ricciardo
Hulkenberg
Perez
Ocon
Stroll
Albon
Gasly
Bottas
Zhou
Magnussen
Alonso
Sargeant


In [3]:
p = QualiParser()
races = p.get_races(2024)
for r in races:
    print(r)

Bahrain
Saudi Arabia
Australia
Japan
China
Miami
Emilia Romagna
Monaco
Canada
Spain
Austria
Great Britain
Hungary
Belgium
Netherlands
Italy
Azerbaijan
Singapore
United States
Mexico
Brazil
Las Vegas
Qatar
Abu Dhabi


In [1]:
import unicodedata
import requests
from collections import OrderedDict
from bs4 import BeautifulSoup


class QualiParser(object):

    def __init__(self):
        self._root = 'https://www.formula1.com'
        self._race_links = {}
        self._translation = {
            "Guanyu": "Zhou"
        }

    def get_grid(self, year, race):
        link = self._get_quali_links(year)[race]
        url = self._root + link
        return self._get_grid_from_link(url)
    
    def get_full(self, year, race):
        link = self._get_quali_links(year)[race]
        url = self._root + link
        return self._get_full_from_link(url)       
        
    def get_races(self, year):
        return [x.strip() for x in self._get_race_links(year).keys()]
    
    def _get_race_links(self, year):
        if year not in self._race_links:
            url = "%s/en/results.html/%s/races.html" % (self._root, year)
            r = requests.get(url)
            soup = BeautifulSoup(r.text, 'html.parser')
            ll = [(x.span.text.strip(), x['href']) for x in soup.find_all(attrs={"data-name": "meetingKey"}) if x.span.text != "All"]
            self._race_links[year] = OrderedDict(ll)
        return self._race_links[year]
        
    def _get_quali_links(self, year):
        return {k: v.replace('race-result.html', 'qualifying.html') for (k, v) in self._get_race_links(year).items()}
        
    def _normalise(self, s):
        norm =  ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
        trans = self._translation.get(norm, norm)
        return trans

    def _table_from_link(self, url):
        r = requests.get(url)
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'html.parser')
        table = soup.find(attrs={"class": "resultsarchive-table"})
        return table
        
    def _get_grid_from_link(self, url):        
        table = self._table_from_link(url)
        
        grid = [self._normalise(x('td')[3]('span')[1].text) for x in table.tbody('tr')]
        
        return grid
        
    def _get_full_from_link(self, url):
        table = self._table_from_link(url)
        
        def details_from_row(row):
            forename = self._normalise(row('td')[3]('span')[0].text)
            surname = self._normalise(row('td')[3]('span')[1].text)
            return forename, surname
        
        full = [details_from_row(x) for x in table.tbody('tr')]
        
        return full



In [None]:
url = 'https://www.formula1.com/en/results.html/2016/races/954/united-states/qualifying.html'
r = requests.get(url)
r.encoding = 'utf-8'
r.text