In [18]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [19]:
ids = [
    {"id":"results20246761A_overall"},
    {"id":"results20246761B_overall"},
    {"id":"results20246761C_overall"},
    {"id":"results20246761D_overall"},
    {"id":"results20246761E_overall"},
    {"id":"results20246761F_overall"},
]

In [20]:
url = "https://fbref.com/en/comps/676/European-Championship-Stats"

In [21]:
dataframes = []

In [22]:
for attr in ids:
    tables = pd.read_html(url, attrs=attr)

    if tables:
        dataframes.append(tables[0])

result = pd.concat(dataframes, ignore_index=True)

In [23]:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

In [24]:
squad_links= [] 

In [26]:
for attr in ids:
    table = soup.find('table', attrs=attr)
    if table:
        for row in table.find_all('tr'):
            squad_cell = row.find('td', {'data-stat': 'team'})
            if squad_cell and squad_cell.find('a'):
                squad_link = squad_cell.find('a')['href']
                full_link = f"https://fbref.com{squad_link}"
                squad_links.append(full_link)

if len(squad_links) < len(result):
    squad_links.extend([None] * (len(result) - len(squad_links)))

result['Squad Link'] = squad_links

In [28]:
squad_links_pd = pd.Series(squad_links)

In [29]:
squad_links_pd

0     https://fbref.com/en/squads/c1e40422/Germany-M...
1     https://fbref.com/en/squads/b4ac5e97/Hungary-M...
2     https://fbref.com/en/squads/602d3994/Scotland-...
3     https://fbref.com/en/squads/81021a70/Switzerla...
4     https://fbref.com/en/squads/7b08e376/Croatia-M...
5     https://fbref.com/en/squads/998c5958/Italy-Men...
6     https://fbref.com/en/squads/b561dd30/Spain-Men...
7     https://fbref.com/en/squads/b44b9eb7/Albania-M...
8     https://fbref.com/en/squads/29a4e4af/Denmark-M...
9     https://fbref.com/en/squads/1862c019/England-M...
10    https://fbref.com/en/squads/1d6f5c9b/Serbia-Me...
11    https://fbref.com/en/squads/6b9f868f/Slovenia-...
12    https://fbref.com/en/squads/d5121f10/Austria-M...
13    https://fbref.com/en/squads/b1b36dcd/France-Me...
14    https://fbref.com/en/squads/5bb5024a/Netherlan...
15    https://fbref.com/en/squads/8912dcf0/Poland-Me...
16    https://fbref.com/en/squads/361422b9/Belgium-M...
17    https://fbref.com/en/squads/7def9493/Roman

In [30]:
import os

In [31]:
output_dir = 'team_html_files'
os.makedirs(output_dir, exist_ok=True)

In [33]:
for i, link in enumerate(squad_links):
    if link:
        response = requests.get(link)
        if response.status_code == 200:
            team_name = result.at[i, 'Squad']
            filename = f"{team_name.replace(' ', '-')}.html"
            filepath = os.path.join(output_dir, filename)
            with open(filepath, 'w', encoding='utf-8') as file:
                file.write(response.text)
            print(f"Saved {filename} to {filepath}")
        else:
            print(f"Failed to download {link}")
    else:
        print(f"No link found for {i}")

Saved de-Germany.html to team_html_files\de-Germany.html
Saved hu-Hungary.html to team_html_files\hu-Hungary.html
Saved sct-Scotland.html to team_html_files\sct-Scotland.html
Saved ch-Switzerland.html to team_html_files\ch-Switzerland.html
Saved hr-Croatia.html to team_html_files\hr-Croatia.html
Saved it-Italy.html to team_html_files\it-Italy.html
Saved es-Spain.html to team_html_files\es-Spain.html
Saved al-Albania.html to team_html_files\al-Albania.html
Saved dk-Denmark.html to team_html_files\dk-Denmark.html
Saved eng-England.html to team_html_files\eng-England.html
Saved rs-Serbia.html to team_html_files\rs-Serbia.html
Saved si-Slovenia.html to team_html_files\si-Slovenia.html
Saved at-Austria.html to team_html_files\at-Austria.html
Saved fr-France.html to team_html_files\fr-France.html
Saved nl-Netherlands.html to team_html_files\nl-Netherlands.html
Saved pl-Poland.html to team_html_files\pl-Poland.html
Saved be-Belgium.html to team_html_files\be-Belgium.html
Saved ro-Romania.html