# Libraries

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import os
import time
from getpass import getuser

# ELO ratings European Championships

In [None]:


# Set up the Selenium WebDriver
driver = webdriver.Chrome()  # Ensure the ChromeDriver is installed and in PATH

# European Championship start dates
competitions = [
    {"year": 1988, "month": 6, "day": 9},
    {"year": 1992, "month": 6, "day": 9},
    {"year": 1996, "month": 6, "day": 7},
    {"year": 2000, "month": 6, "day": 9},
    {"year": 2004, "month": 6, "day": 11},
    {"year": 2008, "month": 6, "day": 6},
    {"year": 2012, "month": 6, "day": 7},
    {"year": 2016, "month": 6, "day": 9},
    {"year": 2021, "month": 6, "day": 10},  # Postponed to 2021 due to COVID-19
    {"year": 2024, "month": 6, "day": 13},
]

# Initialize a DataFrame to store all data
all_data = []

# Loop through each competition
for competition in competitions:
    year = competition["year"]
    month = competition["month"]
    day = competition["day"] - 1  # Get the day before the competition starts

    # Construct the URL
    url = f"https://www.international-football.net/elo-ratings-table?year={year}&month={month}&day={day}&confed=UEFA"

    # Open the webpage
    driver.get(url)

    # Wait for the page to fully load
    time.sleep(5)  # Ensure JavaScript content loads completely

    # Find all tbody elements
    tbodys = driver.find_elements(By.TAG_NAME, "tbody")
    print(f"For {year}-{month}-{day}, found {len(tbodys)} <tbody> elements.")

    # Initialize lists to store data for this competition
    teams = []
    elo_ratings = []

    # Process all tbody elements found
    for tbody_index in range(1, len(tbodys)):  # Start from index 1 to process relevant tbodys
        tbody = tbodys[tbody_index]
        rows = tbody.find_elements(By.TAG_NAME, "tr")
        
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if len(cols) >= 3:  # Ensure there are at least three columns
                team = cols[1].text.strip()  # Second column contains the team name
                elo_rating = cols[2].text.strip()  # Third column contains the Elo rating
                teams.append(team)
                elo_ratings.append(elo_rating)

    # Create a DataFrame for this competition
    competition_df = pd.DataFrame({
        "year": year,
        "month": month,
        "day": day,
        "team": teams,
        "elo_rating": elo_ratings,
    })
    
    # Append the data to the all_data list
    all_data.append(competition_df)

# Close the WebDriver
driver.quit()

# Concatenate all data into a single DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Save the DataFrame to the specified directory
user = getuser()  # Dynamically get the current username
file_path = os.path.join(f"C:\\Users\\{user}\\Documents\\GitHub\\tiebreak_wc\\data\\in", "elo_eu.xlsx")

# Ensure the directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Save the DataFrame to the specified file path in Excel format
final_df.to_excel(file_path, index=False)

print(f"DataFrame saved to {file_path}")


For 1988-6-8, found 5 <tbody> elements.
For 1992-6-8, found 6 <tbody> elements.
For 1996-6-6, found 6 <tbody> elements.
For 2000-6-8, found 7 <tbody> elements.
For 2004-6-10, found 7 <tbody> elements.
For 2008-6-5, found 7 <tbody> elements.
For 2012-6-6, found 7 <tbody> elements.
For 2016-6-8, found 7 <tbody> elements.
For 2021-6-9, found 7 <tbody> elements.
For 2024-6-12, found 7 <tbody> elements.
DataFrame saved to C:\Users\aldi\Documents\GitHub\tiebreak_wc\data\in\elo_eu.xlsx


# ELO ratings World Cup

In [23]:
# Set up the Selenium WebDriver
driver = webdriver.Chrome()  # Ensure the ChromeDriver is installed and in PATH

# World Cup start dates
competitions = [
    {"year": 1986, "month": 5, "day": 31},
    {"year": 1990, "month": 6, "day": 8},
    {"year": 1994, "month": 6, "day": 17},
    {"year": 1998, "month": 6, "day": 10},
    {"year": 2002, "month": 5, "day": 31},
    {"year": 2006, "month": 6, "day": 9},
    {"year": 2010, "month": 6, "day": 11},
    {"year": 2014, "month": 6, "day": 12},
    {"year": 2018, "month": 6, "day": 14},
    {"year": 2022, "month": 11, "day": 20},
]

# Initialize a DataFrame to store all data
all_data = []

# Loop through each competition
for competition in competitions:
    year = competition["year"]
    month = competition["month"]
    day = competition["day"] - 1  # Get the day before the competition starts

    # Construct the URL
    url = f"https://www.international-football.net/elo-ratings-table?year={year}&month={month}&day={day}&confed=FIFA"

    # Open the webpage
    driver.get(url)

    # Wait for the page to fully load
    time.sleep(5)  # Ensure JavaScript content loads completely

    # Find all tbody elements
    tbodys = driver.find_elements(By.TAG_NAME, "tbody")
    print(f"For {year}-{month}-{day}, found {len(tbodys)} <tbody> elements.")

    # Initialize lists to store data for this competition
    teams = []
    elo_ratings = []

    # Process all tbody elements found
    for tbody_index in range(1, len(tbodys)):  # Start from index 1 to process relevant tbodys
        tbody = tbodys[tbody_index]
        rows = tbody.find_elements(By.TAG_NAME, "tr")
        
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            if len(cols) >= 3:  # Ensure there are at least three columns
                team = cols[1].text.strip()  # Second column contains the team name
                elo_rating = cols[2].text.strip()  # Third column contains the Elo rating
                teams.append(team)
                elo_ratings.append(elo_rating)

    # Create a DataFrame for this competition
    competition_df = pd.DataFrame({
        "year": year,
        "month": month,
        "day": day,
        "team": teams,
        "elo_rating": elo_ratings,
    })
    
    # Append the data to the all_data list
    all_data.append(competition_df)

# Close the WebDriver
driver.quit()

# Concatenate all data into a single DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Save the DataFrame to the specified directory
user = getuser()  # Dynamically get the current username
file_path = os.path.join(f"C:\\Users\\{user}\\Documents\\GitHub\\tiebreak_wc\\data\\in", "elo_wc.xlsx")

# Ensure the directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Save the DataFrame to the specified file path in Excel format
final_df.to_excel(file_path, index=False)

print(f"DataFrame saved to {file_path}")


For 1986-5-30, found 17 <tbody> elements.
For 1990-6-7, found 18 <tbody> elements.
For 1994-6-16, found 21 <tbody> elements.
For 1998-6-9, found 22 <tbody> elements.
For 2002-5-30, found 22 <tbody> elements.
For 2006-6-8, found 22 <tbody> elements.
For 2010-6-10, found 22 <tbody> elements.
For 2014-6-11, found 22 <tbody> elements.
For 2018-6-13, found 23 <tbody> elements.
For 2022-11-19, found 23 <tbody> elements.
DataFrame saved to C:\Users\aldi\Documents\GitHub\tiebreak_wc\data\in\elo_wc.xlsx
