In [1]:
import requests
import re
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np

In [10]:
### CLASSES ###

# class to hold individual player information
class Player:
    def __init__(self, player_id, name, link, classification):
        self.id = player_id # unique ID, in case it is useful later (not pokemonID)
        self.name = name

        self.decklist_link = link
        self.classification = classification
        self.CP = 0
        self.Day2 = False

        self.round_data = [] # [['Opponent 1 name', 'W'], ['Opponent 2 name', 'L'], ...]
        self.score = [0, 0, 0] # W, L, T
        
        self.round_data = {'R1': ['Jane Doe', 'W']}
        
        self.decklist = []
        self.pokemonlist = []
        self.archetype = 'Other'
        self.variant = 'Lost Kyogre' # RIP

In [12]:
### FUNCTIONS ###

# get soup from URL
def GetWebpage(URL):
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")
    return soup

# scrap tournament data
def GetPlayerData(tournament_soup):  
    find_list = tournament_soup.find_all('tr') # get all players from the player table (i.e. table rows)
    find_list.pop(0) # pop header row
    
    # counters for number of players in each division
    master_counter = 0
    senior_counter = 0 
    junior_counter = 0
    
    # list for Masters players
    player_list = []
    
    for i in range(len(find_list)): # loop through players (i.e. table rows)
        player_info = find_list[i].find_all('td') # get player data from the row (i.e. table element)
        
        # check player division
        if 'Masters' in player_info[4]:
            master_counter += 1
        elif 'Senior' in player_info[4]:
            senior_counter += 1
            continue
        elif 'Junior' in player_info[4]:
            junior_counter += 1
            continue
    
        # continue gathering information for Masters players only
        player_name = player_info[1].text + " " + player_info[2].text.strip()
    
        player_list.append(Player(i, player_name, "", -1)) # player.id will correspond to i in this loop (i.e. player number including JR/SR)

    return player_list, master_counter, senior_counter, junior_counter

def ExportData(player_list, filename):
    data_list = []
    for player in player_list:
        temp = [player.id, player.name, player.decklist_link, player.classification, player.CP, player.Day2,\
                player.round_data, player.score,\
                player.decklist, player.pokemonlist, player.archetype, player.variant]
        data_list.append(temp)
    
    dataframe = pd.DataFrame(data_list, columns=['ID', 'Name', 'Link', 'Classification', 'CP', 'Day 2',\
                                                 'Rounds', 'Score',\
                                                 'Decklist', 'Pokemon List', 'Archetype', 'Variant'])
    dataframe.to_excel(filename)
    
def ImportData(filename):
    # could use some cleaning, especially for data types, but I am too lazy for now
    imported_data = pd.read_excel(filename)
    imported_data = imported_data.values.tolist()
    
    imported_players = []
    for player in imported_data:
        # __init__(self, player_id, name, link, classification)
        new_player = Player(int(player[1]), player[2], player[3], int(player[4]))
        new_player.CP = int(player[5])
        new_player.Day2 = bool(player[6])
        new_player.round_data = np.array(eval(player[7])).tolist()
        new_player.score = np.array(eval(player[8])).tolist()
        new_player.decklist = np.array(eval(player[9])).tolist()
        new_player.pokemonlist = np.array(eval(player[10])).tolist()
        new_player.archetype = player[11]
        new_player.variant = player[12]
        imported_players.append(new_player)
    return imported_players

In [16]:
# webpage of tournament roster
soup_roster = GetWebpage("https://rk9.gg/roster/EU01wICdQN8zZclF7NTW")

In [17]:
### Scrap roster data ###
player_list, master_counter, senior_counter, junior_counter = GetPlayerData(soup_roster)

print("Tournament data retrieved!")
print("---------------------------------")

print("Master players: " + str(master_counter))
print("Senior players: " + str(senior_counter))
print("Junior players: " + str(junior_counter))
print("Total players: " + str(master_counter + senior_counter + junior_counter))
print("---------------------------------")

Tournament data retrieved!
---------------------------------
Master players: 3354
Senior players: 327
Junior players: 279
Total players: 3960
---------------------------------


In [18]:
####################### DO NOT USE UNLESS SCRAPPING WAS JUST DONE #######################
### Export player list ###
# this is so the next sections can be run without running the scrapping all over again (\approx 1 hour...)

filename = "Tournament data - EUIC2025/PlayerList.xlsx"

ExportData(player_list, filename) # Export to excel
#########################################################################################

In [19]:
### Import saved data ###
filename = "Tournament data - EUIC2025/PlayerList.xlsx"
# there are some complications with importing since all imports are strings and some columns contain other things 
# should clean this up later, but for now the decklist is all that matters
player_list = ImportData(filename) 

#print(np.array(eval(player_list[0].decklist)).tolist())

print(player_list[0].name)
print(player_list[0].round_data)
print(player_list[0].score)
print(player_list[0].archetype)
print(player_list[0].variant)

Alejandro Chacon
{'R1': ['Jane Doe', 'W']}
[0, 0, 0]
Other
Lost Kyogre


In [24]:
# loop to look at individual decks (nice to validate deck selection conditions)
for player in player_list:
    if player.archetype == 'Gardevoir ex':
        print(str(player.name))
        continue


In [26]:
# webpage of tournament pairings
soup_pairings = GetWebpage("https://rk9.gg/pairings/EU01wICdQN8zZclF7NTW?pod=2&rnd=1") # pod=2&rnd=2 etc.

In [27]:
find_list = soup_pairings.find_all('div', id="cell-2-1-0-1")
print(find_list)

[<div class="col-5 text-center player player1 winner" id="cell-2-1-0-1"><span class="name">Ben<br/> Shannon [UK]<br/></span> (1-0-0) 3 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Tyler<br/> Hammond [UK]<br/></span> (0-1-0) 0 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Jordan<br/> Peacock [UK]<br/></span> (0-1-0) 0 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Maximilien<br/> Mairey [FR]<br/></span> (0-1-0) 0 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Giuseppe<br/> Ametrano [IT]<br/></span> (0-1-0) 0 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Benjamin<br/> Rabaiotti [UK]<br/></span> (0-1-0) 0 pts <br/></div>, <div class="col-5 text-center player player1" id="cell-2-1-0-1"><span class="name">Sacha<br/> Scoppa<br/