In [None]:
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Set up Chrome WebDriver
downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads")
chromedriver_path = os.path.join(downloads_folder, "chromedriver")  # Adjust for Windows: add ".exe"
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service)

# Open the page
url = 'https://eloratings.net/2024'
driver.get(url)

# Wait for the grid-canvas div to load
try:
    grid_canvas = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, "grid-canvas"))
    )
except Exception as e:
    print(f"Error: {e}")
    driver.quit()
    exit()

# Scroll to ensure all rows are loaded dynamically
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Wait for more rows to load
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# Extract rows within the grid-canvas
rows = grid_canvas.find_elements(By.CLASS_NAME, "slick-row")

# Parse data from each row
data = []
for row in rows:
    cells = row.find_elements(By.CLASS_NAME, "slick-cell")  # Get all cells in the row
    row_data = [cell.text.strip() for cell in cells]  # Extract text from each cell
    if row_data:  # Only append non-empty rows
        data.append(row_data)

# Ensure all rows have the same number of columns
max_columns = max(len(row) for row in data)  # Find the maximum column count
for row in data:
    while len(row) < max_columns:  # Fill missing columns with None
        row.append(None)

# Define column names based on the table structure
columns = [
    "Rank", "Team", "Rating", "Average Rank", "Average Rating",
    "1 Year Rank Change", "1 Year Rating Change",
    "Total Matches", "Home", "Away", "Neutral",
    "Wins", "Losses", "Draws", "Goals For", "Goals Against"
]

# Adjust column names if there are more or fewer columns
if len(columns) != max_columns:
    columns = [f"Column {i+1}" for i in range(max_columns)]

# Create a DataFrame
df = pd.DataFrame(data, columns=columns)

# Save to CSV in Downloads folder
csv_path = os.path.join(downloads_folder, "elo_ratings_2024.csv")
df.to_csv(csv_path, index=False)

# Close the driver
driver.quit()

print(f"Data extraction complete. Saved to '{csv_path}'.")


Data extraction complete. Saved to 'C:\Users\ALESSANDRO\Downloads\elo_ratings_2024.csv'.


In [3]:
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from getpass import getuser

# Get the current user's name dynamically
user = getuser()

# Set up the path for saving the CSV
output_folder = f"C:\\Users\\{user}\\Documents\\GitHub\\tiebreak_wc\\data\\in"
os.makedirs(output_folder, exist_ok=True)  # Ensure the directory exists
output_path = os.path.join(output_folder, "elo_ratings.csv")

# Set up Chrome WebDriver
downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads")
chromedriver_path = os.path.join(downloads_folder, "chromedriver.exe")  # Adjust for Windows: add ".exe"
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service)

# Define the range of years
years = range(1986, 2025)  # From 1986 to 2024

# Initialize an empty DataFrame to collect data for all years
all_data = pd.DataFrame()

for year in years:
    print(f"Collecting data for year {year}...")
    
    # Open the page for the specified year
    url = f'https://eloratings.net/{year}'
    driver.get(url)

    # Wait for the grid-canvas div to load
    try:
        grid_canvas = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, "grid-canvas"))
        )
    except Exception as e:
        print(f"Error loading data for year {year}: {e}")
        continue

    # Scroll to ensure all rows are loaded dynamically
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Wait for more rows to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Extract rows within the grid-canvas
    rows = grid_canvas.find_elements(By.CLASS_NAME, "slick-row")

    # Parse data from each row
    data = []
    for row in rows:
        cells = row.find_elements(By.CLASS_NAME, "slick-cell")  # Get all cells in the row
        row_data = [cell.text.strip() for cell in cells]  # Extract text from each cell
        if row_data:  # Only append non-empty rows
            row_data.append(year)  # Add the year as the last column
            data.append(row_data)

    # Define column names (if this is the first year being processed)
    if all_data.empty:
        max_columns = max(len(row) for row in data)  # Find the maximum column count
        columns = [
            "Rank", "Team", "Rating", "Average Rank", "Average Rating",
            "1 Year Rank Change", "1 Year Rating Change",
            "Total Matches", "Home", "Away", "Neutral",
            "Wins", "Losses", "Draws", "Goals For", "Goals Against", "Year"
        ]
        # Adjust column names dynamically if needed
        if len(columns) != max_columns:
            columns = [f"Column {i+1}" for i in range(max_columns)]

    # Append data for this year to the main DataFrame
    year_df = pd.DataFrame(data, columns=columns)
    all_data = pd.concat([all_data, year_df], ignore_index=True)

# Save the final DataFrame to the specified directory
all_data.to_csv(output_path, index=False)

# Close the driver
driver.quit()

print(f"Data extraction complete. Saved to '{output_path}'.")


Collecting data for year 1986...
Collecting data for year 1987...
Collecting data for year 1988...
Collecting data for year 1989...
Collecting data for year 1990...
Collecting data for year 1991...
Collecting data for year 1992...
Collecting data for year 1993...
Collecting data for year 1994...
Collecting data for year 1995...
Collecting data for year 1996...
Collecting data for year 1997...
Collecting data for year 1998...


KeyboardInterrupt: 