## <u>  FIA Importing Code </u> 

### <u>  Imports </u> 

In [40]:
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup

### <u> FIA Documents </u> 

In [41]:
def races_year(races_dict, year, year_string):
    races = [race for race, race_year in races_dict.items() if year in race_year]
    races_url = "https://www.fia.com/documents/championships/fia-formula-one-world-championship-14/season/season-" + year_string + "/event/"
    return [(race, races_url + race.replace(" ","%20") + r"%20Grand%20Prix") for race in races]

def make_dir(folder_location):
    if not os.path.exists(folder_location):os.mkdir(folder_location)

def pdf_get(races_list, year, directory = '../Downloaded Data/'):
    make_dir(directory + year)
    for (race, url) in races_list: 
        make_dir(directory + year + "/" + race)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")     
        for link in soup.select("a[href$='.pdf']"):
            filename = os.path.join(directory + year + "/" + race,link['href'].split('/')[-1])
            with open(filename, 'wb') as f:
                f.write(requests.get(urljoin(url,link['href'])).content)

### <u>  FIA Event Timing Documents </u> 

In [42]:
def race_urls(year, race_code):
    links = []
    url = 'https://www.fia.com/f1-archives?season=' + race_code
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")     
    for link in soup.findAll('a'):
        links.append(link.get('href'))

    links = [link for link in links if "/events/fia-formula-one-world-championship" in link]
    links = ["https://www.fia.com" + link if "https://www.fia.com" not in link else link for link in links]
    links = [link for link in links if year in link]
    links = [link.replace("https://admin.fia.com","") for link in links]

    master_dict = {}
    key = "season"
    i = 1
    for link in links:
        if 'https://www.fia.com/championship/events/fia-formula-one-world-championship/season-'+ year + '/' in link:
            key = link.replace('https://www.fia.com/championship/events/fia-formula-one-world-championship/season-' + year + '/','')
            master_dict.update({key:[link]})
        elif i == 1:
            key = "season"
            master_dict.update({key:[link]})   
        else:
            my_list = master_dict[key]
            my_list.append(link)
            master_dict.update({key:my_list})
        i = 0
        
    urls = {a:[link for link in b if "eventtiming" in link] for (a,b) in master_dict.items()}
    del(urls["season"])
    if year == "2019":
        urls.update({'austrian-grand-prix': ['https://www.fia.com/eventtiming-information']})
    if year == "2020":    
        urls.update({'formula-1-70th-anniversary-grand-prix': ['https://www.fia.com/events/fia-formula-one-world-championship/season-2020/formula-1-70th-anniversary-grand-prix']})
        urls.pop('formula-1-70th-anniversary-grand')    
    return(urls)

def drop_empty_folders(directory):
    for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
        if not dirnames and not filenames:
            os.rmdir(dirpath)
            
def pdf_timings_get(urls, year, directory = '../Downloaded Data/'):
    make_dir(directory + year)
    for (race, url) in urls.items():
        race = race.replace("-0","").replace("-1","").replace("-grand-prix","").replace("-"," ").title()
        make_dir(directory + year + '/' + race)
        response = requests.get(url[0])
        soup= BeautifulSoup(response.text, "html.parser")     
        for link in soup.select("a[href$='.pdf']"):
            filename = os.path.join(directory + year + '/' + race,link['href'].split('/')[-1])
            with open(filename, 'wb') as f:
                f.write(requests.get(urljoin(url[0],link['href'])).content)
    drop_empty_folders(directory + year)

### <u> Obtaining the PDF's </u> 

In [43]:
races_dict = {
    'Abu Dhabi': [19, 20, 21],
    'Australian': [19, 20],
    'Austrian': [19, 20, 21],
    'Azerbaijan': [19, 21],
    'Bahrain': [19, 20, 21],
    'Belgian': [19, 20, 21],
    'Brazilian': [19, 21],
    'British': [19, 20, 21],
    'Dutch': [21],
    'Canadian': [19],
    'Chinese': [19],
    'Eifel': [20],
    'Emilia Romagna': [20, 21],
    'Formula 1 70th Anniversary': [20],
    'French': [19, 21],
    'German': [19],
    'Hungarian': [19, 20, 21],
    'Italian': [19, 20, 21],
    'Japanese': [19],
    'Mexican': [19, 21],
    'Monaco': [19, 21],
    'Portuguese': [20, 21],
    'Qatar': [21],
    'Russian': [19, 20, 21],
    'Sakhir': [20],
    'Saudi Arabia': [21],
    'Singapore': [19],
    'Spanish': [19, 21],
    'Styrian': [20, 21],
    'Turkish': [20, 21],
    'Tuscan': [20],
    'United States': [19, 21]}

races_19 = races_year(races_dict, 19, "2019-971")
race_urls_2019 = race_urls("2019", "971")
# pdf_get(races_19, '2019')
# pdf_timings_get(race_urls_2019, "2019")

races_20 = races_year(races_dict, 20, "2020-1059")
race_urls_2020 = race_urls("2020", "1059")
# pdf_get(races_20, '2018')
# pdf_timings_get(race_urls_2020, "2020")

races_21 = races_year(races_dict, 21, "2021-1108")
race_urls_2021 = race_urls("2021", "1108")
# pdf_get(races_21, '2021')
# pdf_timings_get(race_urls_2021, "2021")