In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd
import re

In [3]:
def get_league_data(player="player", season="ALL", year="S14", role="TOP"):
    website = ""
    if player == "player":
        website = f"https://gol.gg/players/list/season-{year}/split-{season}/tournament-ALL/"
    else:
        website = f"https://gol.gg/teams/list/season-{year}/split-{season}/tournament-ALL/"
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
    driver.get(website)

    extracted_data = {}

    try:
        #Wait a bit for the page to load fully
        time.sleep(3)
        #Type "LCK" into the Leagues input (Selectize control)
        league_input = driver.find_element(By.CSS_SELECTOR, ".selectize-control.multi .selectize-input input")
        league_input.send_keys("LCK")
        time.sleep(1)
        league_input.send_keys(Keys.ENTER)  # finalize selection
        time.sleep(2)

        #Click the "Refresh" button to apply the LCK filter
        refresh_button = driver.find_element(By.ID, "btn_refresh")
        refresh_button.click()
        time.sleep(5)
        if player == "player": #if input is player
            #set the role filter with role input
            hidden_role = driver.find_element(By.ID, "hiddenfieldrole")
            driver.execute_script("arguments[0].value = arguments[1];", hidden_role, role)
    
            #Submit the form again to filter by role
            form = driver.find_element(By.ID, "FilterForm")
            form.submit()
            time.sleep(5)

        #Parse the updated page with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        #Locate the table containing wanted data
        table = soup.select_one("table.table_list.playerslist.tablesaw.trhover.tablesaw-swipe.tablesaw-sortable")
        if table:
            #Extract rows
            rows = table.find_all('tr')
            table_data = []
            for row in rows:
                cells = row.find_all(['td', 'th'])
                cell_texts = [cell.get_text(strip=True) for cell in cells]
                if cell_texts:
                    table_data.append(cell_texts)

            #Build a 2d dict from the table
            if len(table_data) > 1:
                headers = table_data[0]
                for i in range(1, len(table_data)):
                    row_key = table_data[i][0] + " " + year
                    row_dict = {}
                    if player == "player":
                        row_dict["Season"] = year
                    for j in range(1, len(headers)):
                        if j < len(table_data[i]):
                            row_dict[headers[j]] = table_data[i][j]
                    extracted_data[row_key] = row_dict
            else:
                print("No valid data rows found.")
        else:
            print("Could not find the table.")

    finally:
        #Close the browser
        driver.quit()

    return extracted_data

In [4]:
teams_dict = {}
for season in range(6, 15):
    year = f"S{season}"
    teams_dict.update(get_league_data("team", "ALL", year))

In [10]:
teams = []
for team_key, stats in teams_dict.items():
    # Remove the trailing season part (e.g., " S14") from the team name
    team_name = re.sub(r'\s+S\d+$', '', team_key)
    # Build the row dictionary with a "Team" column and all the stats
    row = {"Team": team_name}
    row.update(stats)
    teams.append(row)

# Convert the list of rows into a DataFrame
df_teams = pd.DataFrame(teams)

# Save to CSV without an index column
df_teams.to_csv("teams_stats.csv", index=False)

In [17]:
roles = ["TOP", "JUNGLE", "MID", "ADC", "SUPPORT"]
players_dict = {}

for season in range(6, 15):
    year = f"S{season}"
    for role in roles:
        players_dict.update(get_league_data("player", "ALL", year, role))

In [18]:
players = []
for player_key, stats in players_dict.items():
    # Remove the trailing season (e.g., " S14") from the player key to get a clean player name
    player_name = re.sub(r'\s+S\d+$', '', player_key)
    # Create a row dictionary with "Player" as the cleaned name and add all stats
    row = {"Player": player_name}
    row.update(stats)
    players.append(row)

# Create the DataFrame
df_players = pd.DataFrame(players)

# Optionally, export to CSV without an index column
df_players.to_csv("players_stats.csv", index=False)

In [19]:
def collect_player(season,year):
    roles = ["TOP", "JUNGLE", "MID", "ADC", "SUPPORT"]
    data = []
    for role in roles:
        role_data = get_league_data("player", season, year, role)
        data.append(role_data)
    return data