In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import pandas as pd

def get_role_data(role="TOP"):
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
    driver.get("https://gol.gg/players/list/season-S15/split-ALL/tournament-LCK%20Spring%202024/")

    extracted_data = {}

    try:
        # 2) Locate the hidden input and set it to 'role'
        hidden_role = driver.find_element(By.ID, "hiddenfieldrole")
        driver.execute_script("arguments[0].value = arguments[1];", hidden_role, role)

        # 3) Locate and submit the form
        form = driver.find_element(By.ID, "FilterForm")
        form.submit()

        # 4) Wait for the new page to load
        time.sleep(5)  # or use an explicit wait if needed

        # 5) Parse the updated page with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # 6) Locate the updated table
        table = soup.select_one("table.table_list.playerslist.tablesaw.trhover.tablesaw-swipe.tablesaw-sortable")
        if table:
            # Extract rows
            rows = table.find_all('tr')
            table_data = []
            for row in rows:
                cells = row.find_all(['td', 'th'])
                cell_texts = [cell.get_text(strip=True) for cell in cells]
                if cell_texts:
                    table_data.append(cell_texts)

            # Build a dict-of-dicts from the table
            if len(table_data) > 1:
                headers = table_data[0]  # first row: column names
                for i in range(1, len(table_data)):
                    row_key = table_data[i][0]  # e.g. player name
                    row_dict = {}
                    for j in range(1, len(headers)):
                        if j < len(table_data[i]):
                            row_dict[headers[j]] = table_data[i][j]
                    extracted_data[row_key] = row_dict
            else:
                print("No valid data rows found for role:", role)
        else:
            print("Table not found for role:", role)

    finally:
        # 7) Close the browser
        driver.quit()

    return extracted_data

In [3]:
top_data = get_role_data(role="TOP")
jg_data = get_role_data(role="JUNGLE")
mid_data = get_role_data(role="MID")
support_data = get_role_data(role="SUPPORT")
bot_data = get_role_data(role="ADC")

In [4]:
import os

os.makedirs('data', exist_ok=True)

roles_dict = {
    'top_data': top_data,
    'jg_data': jg_data,
    'mid_data': mid_data,
    'support_data': support_data,
    'bot_data': bot_data
}

for role_name, role_dict in roles_dict.items():
    df = pd.DataFrame(role_dict)
    df.to_csv(f'data/{role_name}.csv', index=False)