In [19]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [30]:
### CLASSES ###

# class to hold individual player information
class Player:
    def __init__(self, player_id, name, link, classification):
        self.id = player_id # unique ID, in case it is useful later (not pokemonID)
        self.name = name

        self.decklist_link = link
        self.classification = classification
        self.CP = 0
        self.Day2 = False
        
        self.decklist = []
        self.pokemonlist = []
        self.archetype = 'Other'
        self.variant = 'Lost Kyogre' # RIP

In [21]:
### FUNCTIONS ###

# get soup from URL
def GetWebpage(URL):
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")
    return soup

# scrap tournament data
def GetPlayerData(tournament_soup):  
    find_list = tournament_soup.find_all('tr') # get all players from the player table (i.e. table rows)
    find_list.pop(0) # pop header row
    
    # counters for number of players in each division
    master_counter = 0
    senior_counter = 0 
    junior_counter = 0
    
    # list for Masters players
    player_list = []
    
    for i in range(len(find_list)): # loop through players (i.e. table rows)
        player_info = find_list[i].find_all('td') # get player data from the row (i.e. table element)
        
        # check player division
        if 'Masters' in player_info[4]:
            master_counter += 1
        elif 'Senior' in player_info[4]:
            senior_counter += 1
            continue
        elif 'Junior' in player_info[4]:
            junior_counter += 1
            continue
    
        # continue gathering information for Masters players only
        player_name = player_info[1].text + " " + player_info[2].text.strip()
    
        player_list.append(Player(i, player_name, "", -1))

    return player_list, master_counter, senior_counter, junior_counter

def ExportData(player_list, filename):
    data_list = []
    for player in player_list:
        temp = [player.id, player.name, player.decklist_link, player.classification, player.CP, player.Day2,\
                player.decklist, player.pokemonlist, player.archetype, player.variant]
        data_list.append(temp)
    
    dataframe = pd.DataFrame(data_list, columns=['ID', 'Name', 'Link', 'Classification', 'CP', 'Day 2',\
                                                 'Decklist', 'Pokemon List', 'Archetype', 'Variant'])
    dataframe.to_excel(filename)
    
def ImportData(filename):
    # could use some cleaning, especially for data types, but I am too lazy for now
    imported_data = pd.read_excel(filename)
    imported_data = imported_data.values.tolist()
    
    imported_players = []
    for player in imported_data:
        # __init__(self, player_id, name, link, classification)
        new_player = Player(int(player[1]), player[2], player[3], int(player[4]))
        new_player.CP = int(player[5])
        new_player.Day2 = bool(player[6])
        new_player.decklist = np.array(eval(player[7])).tolist()
        new_player.pokemonlist = np.array(eval(player[8])).tolist()
        new_player.archetype = player[9]
        new_player.variant = player[10]
        imported_players.append(new_player)
    return imported_players

In [6]:
# webpage of tournament roster
soup_roster = GetWebpage("https://rk9.gg/roster/EU01wICdQN8zZclF7NTW")

In [22]:
### Scrap roster data ###
player_list, master_counter, senior_counter, junior_counter = GetPlayerData(soup_roster)

print("Tournament data retrieved!")
print("---------------------------------")

print("Master players: " + str(master_counter))
print("Senior players: " + str(senior_counter))
print("Junior players: " + str(junior_counter))
print("Total players: " + str(master_counter + senior_counter + junior_counter))
print("---------------------------------")

Tournament data retrieved!
---------------------------------
Master players: 3361
Senior players: 327
Junior players: 280
Total players: 3968
---------------------------------


In [23]:
####################### DO NOT USE UNLESS SCRAPPING WAS JUST DONE #######################
### Export player list ###
# this is so the next sections can be run without running the scrapping all over again (\approx 1 hour...)

filename = "Tournament data - EUIC2025/PlayerList.xlsx"

ExportData(player_list, filename) # Export to excel
#########################################################################################

In [65]:
### Import saved data ###
filename = "Tournament data - EUIC2025/PlayerList.xlsx"
# there are some complications with importing since all imports are strings and some columns contain other things 
# should clean this up later, but for now the decklist is all that matters
player_list = ImportData(filename) 

#print(np.array(eval(player_list[0].decklist)).tolist())

print(player_list[0].name)
print(player_list[0].archetype)
print(player_list[0].variant)

Alejandro Chacon
Other
Lost Kyogre


In [66]:
# loop to look at individual decks (nice to validate deck selection conditions)
for player in player_list:
    if player.archetype == 'Gardevoir ex':
        print(str(player.name))
        continue


Alessio Cefola
Shauna Smith
Merlin Bresinski
Matteo Spalletta
Cyrielle Danel
Soo-Bin Audouy
Richard Yannow
Rene Ess
Nina Eisenbl√§tter
Berry Agricola
Arthur Kerbiquet
Jory Koot


In [67]:
# Calculate metashare
# 'deck name':[number of players with deck, number of players in Day 2 with deck, total CP won by deck]
archetypes = {'Charizard Pidgeot':[0,0,0], 'Ancient Box':[0,0,0], 'Archaludon ex':[0,0,0], 'Banette Gardevoir':[0,0,0], 'Ceruledge ex':[0,0,0],\
              'Regidrago VSTAR':[0,0,0], 'Dragapult Dusknoir':[0,0,0], 'Dragapult Pidgeot':[0,0,0], 'Dragapult Iron Thorns':[0,0,0],\
              'DragaZard ex':[0,0,0], 'Gardevoir ex':[0,0,0], 'Poison Terapagos':[0,0,0], 'Lost Zone Box':[0,0,0], 'Roaring Moon ex':[0,0,0],\
              'Pidgeot Control':[0,0,0], 'Snorlax Stall':[0,0,0], 'Lugia Archeops':[0,0,0], 'Gholdengo ex':[0,0,0], 'Miraidon ex':[0,0,0],\
              'Raging Bolt':[0,0,0], 'Gouging Fire':[0,0,0], 'Wall':[0,0,0], 'Terapagos Dusknoir':[0,0,0], 'Palkia':[0,0,0], 'Other':[0,0,0]}

# compile info for each deck
for player in player_list:
    archetypes[player.archetype][0] += 1 # number of players in Day 1 with deck 
    archetypes[player.archetype][2] += player.CP # total CP won by deck
    if player.Day2:
        #if player.classification == 184:
            #print(player.name) # confirming I did the Day 2 kicker correctly (double check with Pokedata/Limitless)
        archetypes[player.archetype][1] += 1 # number of players in Day 2 with deck
        

#print(imported_players[0].decklist)
#print(imported_players[0].deck)

# print info (easy to copy to excel, could just export later)
for arch in archetypes:
    print(str(arch) + "; " + str(archetypes[arch][0]) + "; " + str(archetypes[arch][1]) + "; " + str(archetypes[arch][2]))

Charizard Pidgeot; 2; 0; 0
Ancient Box; 1; 0; 0
Archaludon ex; 12; 0; 0
Banette Gardevoir; 0; 0; 0
Ceruledge ex; 5; 0; 0
Regidrago VSTAR; 1; 0; 0
Dragapult Dusknoir; 16; 0; 0
Dragapult Pidgeot; 0; 0; 0
Dragapult Iron Thorns; 0; 0; 0
DragaZard ex; 1; 0; 0
Gardevoir ex; 12; 0; 0
Poison Terapagos; 3; 0; 0
Lost Zone Box; 3; 0; 0
Roaring Moon ex; 2; 0; 0
Pidgeot Control; 0; 0; 0
Snorlax Stall; 2; 0; 0
Lugia Archeops; 6; 0; 0
Gholdengo ex; 11; 0; 0
Miraidon ex; 10; 0; 0
Raging Bolt; 5; 0; 0
Gouging Fire; 1; 0; 0
Wall; 1; 0; 0
Terapagos Dusknoir; 0; 0; 0
Palkia; 1; 0; 0
Other; 3266; 0; 0
