In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [2]:
### CLASSES ###

# class to hold individual player information
class Player:
    def __init__(self, player_id, name, link, classification, decklist, deck):
        self.id = player_id # unique ID, in case it is useful later (not pokemonID)
        
        self.name = name
        self.decklist_link = link
        self.classification = classification
        
        self.decklist = decklist # not complete decklist, only pokemon
        self.deck = deck


In [3]:
### FUNCTIONS ###

# get decklist from URL
def GetDecklist(URL):
    soup_player = GetWebpage(URL)
    find_list = soup_player.find_all('li', attrs={'class': 'pokemon', 'data-language': "EN"}) # find all HTML list 'li' elements with EN pokemon cards
    #print(find_list)

    pokemon_list = [] # list to hold all pokemon inside a given player's decklist
    
    for element in find_list:
        pokemon_list.append(element['data-cardname'])

    #print(pokemon_list)
    
    return pokemon_list

# get soup from URL
def GetWebpage(URL):
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, "html.parser")
    return soup

In [4]:
print(GetDecklist("https://rk9.gg/decklist/public/ME01maemuAlswkbwNWfw/G1lXdlFZx40bq9PUmp1W"))
print(GetDecklist("https://rk9.gg/decklist/public/ME01maemuAlswkbwNWfw/0M3KDh1lYB2elENzXaua")) # player id = 2


['Flutter Mane', 'Great Tusk', 'Roaring Moon', 'Pecharunt ex', 'Iron Bundle', 'Munkidori', 'Budew', 'Radiant Greninja']
['Dreepy', 'Drakloak', 'Dragapult ex', 'Dragapult ex', 'Budew', 'Duskull', 'Duskull', 'Dusclops', 'Dusknoir', 'Fezandipiti ex', 'Lumineon V', 'Radiant Alakazam', 'Manaphy']


In [5]:
# webpage of tournament roster
soup_tournament = GetWebpage("https://rk9.gg/roster/ME01maemuAlswkbwNWfw")

In [6]:
### Scrap tournament data ###
find_list = soup_tournament.find_all('tr') # get all players from the player table (i.e. table rows)
find_list.pop(0) # pop header row

# counters for number of players in each division
master_counter = 0
senior_counter = 0 
junior_counter = 0

# list for Masters players
player_list = []

for i in range(len(find_list)): # loop through players (i.e. table rows)
    player_info = find_list[i].find_all('td') # get player data from the row (i.e. table element)
    
    # check player division
    if 'Masters' in player_info[4]:
        master_counter += 1
    elif 'Senior' in player_info[4]:
        senior_counter += 1
        continue
    elif 'Junior' in player_info[4]:
        junior_counter += 1
        continue

    # continue gathering information for Masters players only
    player_name = player_info[1].text + " " + player_info[2].text.strip()
    player_link = player_info[5].find_all('a')[0]['href']
    try: 
        player_classification = int(player_info[6].text)
    except:
        print('Error retrieving player classification for: ' + player_name)
        player_classification = -1

    #print(player_name)
    #print(player_link)
    #print(player_classification)

    player_list.append(Player(i, player_name, player_link, player_classification, '', 'Lost Kyogre')) # RIP

print("Tournament data retrieved!")
print("---------------------------------")

print("Master players: " + str(master_counter))
print("Senior players: " + str(senior_counter))
print("Junior players: " + str(junior_counter))
print("Total players: " + str(master_counter + senior_counter + junior_counter))
print("---------------------------------")

Error retrieving player classification for: raul perez
Error retrieving player classification for: Cesar Cubias
Tournament data retrieved!
---------------------------------
Master players: 1149
Senior players: 55
Junior players: 42
Total players: 1246
---------------------------------


In [7]:
### Get decklist information ###
url_base = "https://rk9.gg"

print("Gathering decklist data...")

for player in player_list:
    player.decklist = GetDecklist(url_base + player.decklist_link)
    print("- ID = " + str(player.id))

Gathering decklist data...
- ID = 0
- ID = 1
- ID = 2
- ID = 3
- ID = 4
- ID = 5
- ID = 6
- ID = 7
- ID = 8
- ID = 9
- ID = 10
- ID = 11
- ID = 12
- ID = 13
- ID = 14
- ID = 15
- ID = 16
- ID = 17
- ID = 18
- ID = 19
- ID = 21
- ID = 22
- ID = 23
- ID = 24
- ID = 25
- ID = 26
- ID = 27
- ID = 28
- ID = 29
- ID = 30
- ID = 31
- ID = 32
- ID = 33
- ID = 34
- ID = 35
- ID = 36
- ID = 37
- ID = 38
- ID = 39
- ID = 40
- ID = 41
- ID = 42
- ID = 44
- ID = 45
- ID = 46
- ID = 47
- ID = 48
- ID = 49
- ID = 50
- ID = 52
- ID = 53
- ID = 54
- ID = 55
- ID = 56
- ID = 58
- ID = 59
- ID = 60
- ID = 61
- ID = 62
- ID = 63
- ID = 64
- ID = 65
- ID = 66
- ID = 67
- ID = 68
- ID = 69
- ID = 70
- ID = 72
- ID = 73
- ID = 74
- ID = 76
- ID = 77
- ID = 78
- ID = 79
- ID = 80
- ID = 81
- ID = 82
- ID = 83
- ID = 85
- ID = 86
- ID = 87
- ID = 89
- ID = 90
- ID = 91
- ID = 92
- ID = 93
- ID = 94
- ID = 95
- ID = 96
- ID = 97
- ID = 98
- ID = 99
- ID = 100
- ID = 102
- ID = 103
- ID = 104
- ID = 105
- ID = 1

- ID = 816
- ID = 817
- ID = 818
- ID = 819
- ID = 820
- ID = 821
- ID = 822
- ID = 824
- ID = 825
- ID = 826
- ID = 827
- ID = 828
- ID = 829
- ID = 831
- ID = 832
- ID = 834
- ID = 835
- ID = 836
- ID = 837
- ID = 838
- ID = 839
- ID = 840
- ID = 841
- ID = 842
- ID = 843
- ID = 844
- ID = 845
- ID = 846
- ID = 847
- ID = 848
- ID = 849
- ID = 850
- ID = 851
- ID = 852
- ID = 854
- ID = 855
- ID = 856
- ID = 857
- ID = 858
- ID = 859
- ID = 860
- ID = 861
- ID = 862
- ID = 863
- ID = 864
- ID = 865
- ID = 866
- ID = 867
- ID = 868
- ID = 869
- ID = 870
- ID = 871
- ID = 872
- ID = 873
- ID = 874
- ID = 875
- ID = 876
- ID = 877
- ID = 878
- ID = 879
- ID = 880
- ID = 881
- ID = 882
- ID = 883
- ID = 884
- ID = 885
- ID = 886
- ID = 887
- ID = 888
- ID = 889
- ID = 890
- ID = 891
- ID = 892
- ID = 893
- ID = 894
- ID = 895
- ID = 896
- ID = 897
- ID = 898
- ID = 899
- ID = 900
- ID = 902
- ID = 903
- ID = 904
- ID = 905
- ID = 906
- ID = 907
- ID = 908
- ID = 909
- ID = 910
- ID = 912

In [13]:
### Export and save data ###
# this is so the next sections can be run without running the scrapping all over again (\approx 1 hour...)

filename = "Tournament data - day 1/ImportedData_Merida2025.xlsx"

data_list = []
for player in player_list:
    temp = [player.id, player.name, player.decklist, player.deck, player.classification]
    data_list.append(temp)

dataframe = pd.DataFrame(data_list, columns=['ID', 'Name', 'Decklist', 'Deck', 'Classification'])
dataframe.to_excel(filename)

In [63]:
####################### START OF ANALYSIS #######################
# Get archetype from decklist
def GetArchetype(pokemonList):     
    # Parse through the deck possibilities (updated: 13/02/2025)
    if 'Charizard ex' in pokemonList and 'Pidgeot ex' in pokemonList:
        #print("Found a Zard player!")
        return 'Charizard Pidgeot'
    
    elif 'Roaring Moon' in pokemonList and 'Roaring Moon ex' not in pokemonList:
        return 'Ancient Box'
    
    elif 'Archaludon ex' in pokemonList:
        return 'Archaludon ex'
    
    elif 'Banette ex' in pokemonList and 'Gardevoir ex' in pokemonList:
        return 'Banette Gardevoir'
    
    elif 'Ceruledge ex' in pokemonList:
        return 'Ceruledge ex'
    
    elif 'Regidrago VSTAR' in pokemonList and 'Regidrago V' in pokemonList:
        return 'Regidrago VSTAR'
    
    elif 'Dragapult ex' in pokemonList and 'Dusknoir' in pokemonList:
        return 'Dragapult Dusknoir'
    
    elif 'Dragapult ex' in pokemonList and 'Pidgeot ex' in pokemonList:
        return 'Dragapult Pidgeot'
    
    elif 'Dragapult ex' in pokemonList and 'Iron Thorns ex' in pokemonList:
        return 'Dragapult Iron Thorns'
    
    elif 'Dragapult ex' in pokemonList and 'Charizard ex' in pokemonList:
        return 'DragaZard ex'
    
    elif 'Gardevoir ex' and ('Scream Tail' in pokemonList or 'Drifloon' in pokemonList):
        return 'Gardevoir ex'
    
    elif 'Terapagos ex' and 'Brute Bonnet' in pokemonList:
        return 'Poison Terapagos'
    
    elif 'Comfey' in pokemonList and 'Radiant Greninja' in pokemonList:
        return 'Lost Zone Box'
    
    elif 'Roaring Moon ex' in pokemonList and 'Roaring Moon' in pokemonList:
        return 'Roaring Moon ex'#'Ancient Moon'
    
    elif 'Pidgeot ex' in pokemonList and 'Wellspring Mask Ogerpon ex' in pokemonList:
        return 'Pidgeot Control'
    
    elif 'Snorlax' in pokemonList:
        return 'Snorlax Stall'
    
    elif 'Lugia VSTAR' in pokemonList:
        return 'Lugia Archeops'
    
    elif 'Gholdengo ex' in pokemonList:
        return 'Gholdengo ex'
    
    elif 'Miraidon ex' in pokemonList:
        return 'Miraidon ex'
    
    elif 'Raging Bolt ex' in pokemonList:
        return 'Raging Bolt'
    
    elif 'Iron Thorns ex' in pokemonList:
        return 'Iron Thorns ex'
    
    elif 'Terapagos ex' and 'Dusknoir' in pokemonList:
        return 'Terapagos Dusknoir'
    
    elif 'Origin Forme Palkia VSTAR' in pokemonList:
        return 'Palkia'
    
    return 'Other'

# get CP from classification
# need to add kicker check to make this transversal to all tournament sizes
def GetCP(rank):
    max_rank = 512
    if rank > max_rank:
        return 0
    elif rank > 512:
        return 20
    elif rank > 256:
        return 40
    elif rank > 128:
        return 60
    elif rank > 64:
        return 80
    elif rank > 32:
        return 100
    elif rank > 16:
        return 125
    elif rank > 8:
        return 160
    elif rank > 4:
        return 280
    elif rank > 2:
        return 300
    elif rank > 1:
        return 325
    elif rank > 0:
        return 350
    else:
        print("That is a strange classification you got there")
        return 0

In [64]:
### Import saved data ###
filename = "Tournament data - day 1/ImportedData_Merida2025.xlsx"
imported_data = pd.read_excel(filename)
imported_data = imported_data.values.tolist()

imported_players = []
for player in imported_data:
    # __init__(self, player_id, name, link, classification, decklist, deck)
    imported_players.append(Player(player[1], player[2], '', player[5], player[3], player[4]))
    
print(imported_data[0])    
print(imported_players[0].decklist)

[0, 0, 'BENJAMIN RUIZ', "['Budew', 'Dreepy', 'Drakloak', 'Dragapult ex', 'Dragapult ex', 'Duskull', 'Dusclops', 'Dusknoir', 'Fezandipiti ex', 'Klefki', 'Radiant Alakazam', 'Bloodmoon Ursaluna ex']", 'Lost Kyogre', 183]
['Budew', 'Dreepy', 'Drakloak', 'Dragapult ex', 'Dragapult ex', 'Duskull', 'Dusclops', 'Dusknoir', 'Fezandipiti ex', 'Klefki', 'Radiant Alakazam', 'Bloodmoon Ursaluna ex']


In [73]:
# 'deck name':[number of players with deck, number of players in Day 2 with deck, total CP won by deck]
# CP won should be added to the Player class (even without scrapping again) but I am lazy
# same thing for who got Day 2
archetypes = {'Charizard Pidgeot':[0,0,0], 'Ancient Box':[0,0,0], 'Archaludon ex':[0,0,0], 'Banette Gardevoir':[0,0,0], 'Ceruledge ex':[0,0,0],\
              'Regidrago VSTAR':[0,0,0], 'Dragapult Dusknoir':[0,0,0], 'Dragapult Pidgeot':[0,0,0], 'Dragapult Iron Thorns':[0,0,0],\
              'DragaZard ex':[0,0,0], 'Gardevoir ex':[0,0,0], 'Poison Terapagos':[0,0,0], 'Lost Zone Box':[0,0,0], 'Roaring Moon ex':[0,0,0],\
              'Pidgeot Control':[0,0,0], 'Snorlax Stall':[0,0,0], 'Lugia Archeops':[0,0,0], 'Gholdengo ex':[0,0,0], 'Miraidon ex':[0,0,0],\
              'Raging Bolt':[0,0,0], 'Iron Thorns ex':[0,0,0], 'Terapagos Dusknoir':[0,0,0], 'Palkia':[0,0,0], 'Other':[0,0,0]}

# define all archetypes from decklist info
for player in imported_players:
    player.deck = GetArchetype(player.decklist)
    archetypes[player.deck][0] += 1 # number of players in Day 1 with deck 
    archetypes[player.deck][2] += GetCP(player.classification) # total CP won by deck
    if player.classification < 184 and player.classification > 0:
        #if player.classification == 184:
            #print(player.name) # confirming I did the Day 2 kicker correctly (double check with Pokedata/Limitless)
        archetypes[player.deck][1] += 1 # number of players in Day 2 with deck
        

#print(imported_players[0].decklist)
#print(imported_players[0].deck)

# print info (easy to copy to excel, could just export later)
for arch in archetypes:
    print(str(arch) + "; " + str(archetypes[arch][0]) + "; " + str(archetypes[arch][1]) + "; " + str(archetypes[arch][2]))

That is a strange classification you got there
That is a strange classification you got there
Charizard Pidgeot; 98; 4; 1280
Ancient Box; 14; 0; 80
Archaludon ex; 66; 10; 1580
Banette Gardevoir; 2; 0; 0
Ceruledge ex; 48; 5; 1400
Regidrago VSTAR; 35; 3; 700
Dragapult Dusknoir; 220; 32; 6145
Dragapult Pidgeot; 0; 0; 0
Dragapult Iron Thorns; 16; 0; 280
DragaZard ex; 28; 3; 700
Gardevoir ex; 156; 33; 4860
Poison Terapagos; 29; 4; 1040
Lost Zone Box; 16; 1; 325
Roaring Moon ex; 28; 1; 340
Pidgeot Control; 2; 2; 225
Snorlax Stall; 15; 5; 680
Lugia Archeops; 112; 32; 4325
Gholdengo ex; 28; 4; 720
Miraidon ex; 67; 21; 3505
Raging Bolt; 81; 16; 2145
Iron Thorns ex; 7; 1; 160
Terapagos Dusknoir; 25; 2; 320
Palkia; 20; 0; 280
Other; 36; 4; 725


In [67]:
# loop to look at individual decks (nice to validate deck selection conditions)
for player in imported_players:
    if player.deck == 'Other':
        print(str(player.id) + " " + str(player.decklist))

print(imported_players[742].name)

83 ['Miraidon', 'Fezandipiti ex', 'Mew ex', 'Iron Bundle', 'Iron Crown ex', 'Iron Hands ex']
142 ['Deino', 'Zweilous', 'Hydreigon ex', 'Pidgey', 'Pidgey', 'Pidgeotto', 'Pidgeot ex', 'Budew', 'Radiant Jirachi', 'Rotom V', 'Fezandipiti ex', 'Pecharunt ex', 'Lumineon V', 'Munkidori']
154 ['Cinderace ex', 'Terapagos ex', 'Cinderace ex', 'Terapagos ex', 'Hoothoot', 'Hoothoot', 'Noctowl', 'Noctowl', 'Scorbunny', 'Bouffalant', 'Fan Rotom', 'Fan Rotom', 'Pidgeot ex', 'Fezandipiti ex', 'Pidgey']
219 ['Tinkatink', 'Tinkatuff', 'Tinkatuff', 'Tinkaton ex', 'Tinkaton ex', 'Tinkaton', 'Rotom V', 'Radiant Jirachi', 'Fezandipiti ex', 'Luxray', 'Dunsparce', 'Dunsparce', 'Dudunsparce']
234 ['Regice', 'Regirock', 'Regidrago', 'Regieleki', 'Regigigas', 'Regigigas', 'Registeel', 'Radiant Greninja', 'Squawkabilly ex', 'Pikachu ex', 'Terapagos ex', 'Cornerstone Mask Ogerpon ex']
248 ['Arceus V', 'Arceus VSTAR', 'Giratina V', 'Giratina VSTAR', 'Bidoof', 'Bibarel', 'Fezandipiti ex', 'Skwovet', 'Iron Leaves ex'