In [1]:
import requests
import os
from functools import cached_property
from collections import defaultdict
import json
from dotenv import load_dotenv
load_dotenv() # includes the CHALLONGE_API_KEY variable

True

# Define code to download and parse Wingspan tournaments from Challonge

In [37]:
def challonge_get(url, retries=3):
    try:
        return requests.get(url, {
            'api_key': os.environ['CHALLONGE_API_KEY']
        }, headers={
            'Content-Type': 'application/json',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
        }).json()
    except:
        print(f"failed to get {url}")
        if retries > 0:
            return challonge_get(url, retries-1)

valid_owners = {'Bargoff',
 'ChopYouUp',
 'ElsieGlen',
 'FlanHigh',
 'FloatingLakes',
 'Gtrudat',
 'Lone_Eider7',
 'ScaredofBirds',
 'deVisme'}


class Game:
    def __init__(self, scores, players, completed_at, winner):
        self.scores = scores
        self.players = players
        self.completed_at = completed_at
        self.winner = winner

    @property
    def score_dict(self):
        return { player: score for player, score in zip(self.players, self.scores) }
        
class Match:
    def __init__(self, attrs):
        self.attrs = attrs

    @property
    def scores(self):
        try:
            return [[int(s) for s in scores.split('-')]
                    for scores in self.attrs['scores_csv'].split(',')]
        except:
            import pdb; pdb.set_trace()

    @property
    def players(self): return [self.attrs['player1_id'], self.attrs['player2_id']]

    @property
    def completed_at(self): return self.attrs.get('completed_at')

    @property
    def winner(self): return self.attrs.get('winner_id')

    @property
    def games(self):
        return [Game(scores, self.players, self.completed_at, self.winner) for scores in self.scores]
        
class Tournament:
    @classmethod
    def search(cls, max=1000):
        """Use the challonge search feature to get a list of Wingspan tournaments"""
        url = f"https://challonge.com/search/tournaments.json?filters%5Bgame_id%5D=215240&per={max}"
        results = challonge_get(url)['collection']
        results = [r for r in results if r['owner'] in valid_owners]
        tourneys = [cls.by_url(r['link']) for r in results]
        tourneys = [t for t in tourneys if not t.name.lower().startswith('test')]
        return tourneys
    
    @classmethod
    def by_url(cls, url):
        id = challonge_get(f"{url}.json")['tournament']['id']
        return cls.by_id(id)

    @classmethod
    def by_id(cls, id):
        return cls(challonge_get(f"https://api.challonge.com/v1/tournaments/{id}.json")['tournament'])

    def __init__(self, attrs):
        self.attrs = attrs

    @property
    def name(self):
        name = self.attrs['name']
        name = name.strip()
        return name

    @property
    def id(self): return self.attrs['id']

    @property
    def url(self): return f"https://challonge.com/{self.attrs['url']}"

    @property
    def date(self): return self.attrs['started_at']
        
    @cached_property
    def participants(self):
        return [p['participant'] for p in challonge_get(f"https://api.challonge.com/v1/tournaments/{self.id}/participants.json")]

    @cached_property
    def participants_by_id(self):
        by_id = {}
        for p in self.participants:
            by_id[p['id']] = p['name']
            if 'group_player_ids' in p:
                for id in p['group_player_ids']:
                    assert id not in by_id
                    by_id[id] = p['name']
        return by_id
            
    @cached_property
    def matches(self):
        return [
            Match(m['match'])
            for m in challonge_get(f"https://api.challonge.com/v1/tournaments/{self.id}/matches.json")
            if m['match']['state'] == 'complete'
        ]

    @property
    def games(self):
        return [game for match in self.matches for game in match.games]

    @cached_property
    def scores_by_participant(self):
        scores = defaultdict(list)
        for g in self.games:
            for pid, score in g.score_dict.items():
                scores[self.participants_by_id[pid]].append(score)
        return scores

    @cached_property
    def as_json(self):
        return {
              'id': self.id,
              'name': self.name,
              'url': self.url,
              'date': self.date,
              'games': [{
                  'completed_at': game.completed_at,
                  'winner': self.participants_by_id[game.winner],
                  'scores': {
                      self.participants_by_id[game.players[0]]: game.scores[0],
                      self.participants_by_id[game.players[1]]: game.scores[1] 
                  } 
              } for game in self.games]
        }
        

In [38]:
t = Tournament(
    challonge_get("https://api.challonge.com/v1/tournaments/13589704.json")['tournament'])
t.name

'Wingspan World Cup Regionals - Asia'

In [39]:
t.as_json

{'id': 13589704,
 'name': 'Wingspan World Cup Regionals - Asia',
 'url': 'https://challonge.com/7ldxp0zx',
 'date': '2023-10-14T01:34:11.774-07:00',
 'games': [{'completed_at': '2023-10-19T14:20:35.234-04:00',
   'winner': 'mike316',
   'scores': {'mike316': 97, 'thebangzats': 86}},
  {'completed_at': '2023-10-15T08:17:17.903-04:00',
   'winner': 'Zeemokung',
   'scores': {'MrSaber': 85, 'Zeemokung': 87}},
  {'completed_at': '2023-11-03T07:58:38.909-04:00',
   'winner': 'Sim17',
   'scores': {'Sim17': 94, 'otsukuuyan': 85}},
  {'completed_at': '2023-10-14T15:47:23.445-04:00',
   'winner': 'Salmonskinroll',
   'scores': {'WillyPizza': 87, 'Salmonskinroll': 93}},
  {'completed_at': '2023-10-15T16:22:06.521-04:00',
   'winner': 'gdhar67 (GasLighter)',
   'scores': {'smelly_cat (smellysmellycat)': 85, 'gdhar67 (GasLighter)': 90}},
  {'completed_at': '2023-10-14T12:47:51.979-04:00',
   'winner': 'smelly_cat (smellysmellycat)',
   'scores': {'Salmonskinroll': 89, 'smelly_cat (smellysmellycat

# Fetch tournaments

In [40]:
tourneys = Tournament.search()

In [41]:
tourney_json = []
bad_tourneys = []
for t in tourneys:
    try:
        tourney_json.append(t.as_json)
    except:
        bad_tourneys.append(t)

In [42]:
tourneys[1].matches[0].__dict__

{'attrs': {'id': 343585905,
  'tournament_id': 13718089,
  'state': 'complete',
  'player1_id': 26013359,
  'player2_id': 26013366,
  'player1_prereq_match_id': None,
  'player2_prereq_match_id': None,
  'player1_is_prereq_match_loser': False,
  'player2_is_prereq_match_loser': False,
  'winner_id': 26013366,
  'loser_id': 26013359,
  'started_at': '2023-11-12T10:08:48.380-06:00',
  'created_at': '2023-11-12T10:08:47.486-06:00',
  'updated_at': '2023-11-17T22:15:51.316-06:00',
  'identifier': 'A',
  'has_attachment': False,
  'round': 1,
  'player1_votes': None,
  'player2_votes': None,
  'group_id': 4875707,
  'attachment_count': None,
  'scheduled_time': None,
  'location': None,
  'underway_at': None,
  'optional': False,
  'rushb_id': None,
  'completed_at': '2023-11-17T22:15:51.338-06:00',
  'suggested_play_order': 1,
  'forfeited': None,
  'open_graph_image_file_name': None,
  'open_graph_image_content_type': None,
  'open_graph_image_file_size': None,
  'prerequisite_match_ids_c

# Normalize user names (there are a lot of near-duplicates :/)

In [43]:
users = set()

for t in tourney_json:
    for game in t['games']:
        for user, _score in game['scores'].items():
            users.add(user)

users = list(sorted(list(users)))

In [44]:
import re

def normalize_name(name):
    name = name.lower().split('(')[0].split(' -')[0].split('\t')[0].split(' |')[0].split('#')[0]
    name = name.replace("_", " ")
    name = name.replace("ign: ", "")
    name = name.replace(" ", "")
    #name = re.sub(r"(^[a-z])", "", name)
    name = name.strip()
    return name

normalized_user_names = { name: normalize_name(name) for name in users }

In [45]:
from collections import defaultdict

user_counts = defaultdict(int)

for t in tourney_json:
    for game in t['games']:
        for user, _score in game['scores'].items():
            user_counts[normalized_user_names[user]] += 1

In [46]:
from difflib import get_close_matches
import numpy as np

substitutions = {}

for name in set(user_counts.keys()):
    other_names = [n for n in set(user_counts.keys()) if n != name]
    matches = get_close_matches(name, other_names, cutoff=0.75)

    if matches:
        names = [name] + matches
        counts = [user_counts[n] for n in names]
        idx = np.argmax(counts)
        for n in names:
            if n != names[idx]:
                substitutions[n] = names[idx]

In [47]:
substitutions2 = {'sleephead': 'sleepyhead',
 'tay-ray': 'tayray',
 #'taykay': 'tayray',
# 'rokb': 'rob',
# 'robin': 'rob',
# 'dagger': 'digger',
 'Å¼yrafðŸ¦’': 'Å¼yraf',
 'zyraf': 'Å¼yraf',
 'thecomedian91': 'thec0median',
 'thec0median91': 'thec0median',
 'irek': 'ireku',
 'no-m': 'no~m',
 'nom': 'no~m',
# 'wingsplaingaming': 'wingspanner',
# 'bluethroat': 'bluethroat4life',
 'wingspanner2': 'wingspanner',
 #'eurasiannuthatch': 'corsicannuthatch',
 #'corsicannuthatch': 'eurasiannuthatch',
 #'team23': 'team20',
 #'sandro': 'andreko',
 'jeasthebeast': 'jeastthebeast',
 #'jeastthebeast+malue': 'jeastthebeast',
# 'eurasianhobby': 'eurasianjay',
# 'ltrudat': 'gtrudat',
 'elsapoguapobot': 'elsapoguapo',
 'dozo': 'dozi',
 'theflash04': 'theflash',
 #'scaredofdiamonds': 'scaredofbirds',
 #'wingsplain': 'wingspanner',
 'seechristy': 'seechristine',
 #'commonmoorhen': 'commonraven',
 #'eurasianjay': 'eurasianhobby',
 #'spartafnfan13+wayward': 'spartanfan13',
 'stormchaser20': 'stormchaser',
 #'merisabear': 'mrsaber',
 'ronster': 'ronster7',
 #'mtrooster': 'ronster',
 'ooievaarðŸ‡³ðŸ‡±': 'ooievaar',
 #'avery': 'av0ry',
 'spartafnfan13': 'spartanfan13',
 #'white-backedwoodpecker': 'blackwoodpecker',
 #'groovenbeast': 'groovenstein',
 'falblingius': 'falblinger',
 #'eurasiansparrowhawk': 'eurasiantreesparrow',
 'nedmund13': 'nedmund',
 'nedmud13': 'nedmund',
 'chuck': 'chuckaus',
 'mothertucker': 'mothertuckers',
# 'eurasiannutcracker': 'eurasiannuthatch',
 'yippecahier': 'yippeecahier',
# 'team20': 'team23',
# 'eurasiantreesparrow': 'eurasiansparrowhawk'
                 }

In [48]:
normalized_user_names2 = { k: substitutions2.get(v, v) for k, v in normalized_user_names.items() }

In [49]:
names_matching_normalized = defaultdict(list)
for k, v in normalized_user_names2.items():
    names_matching_normalized[v].append(k)

In [50]:
from collections import Counter

overall_counts = Counter([
    user
    for t in tourney_json
    for game in t['games']
    for user, _score in game['scores'].items()
])

In [51]:
canonical_names = {}

for normalized_name, variants in names_matching_normalized.items():
    counts_by_variant = [overall_counts[v] for v in variants]
    canonical_names[normalized_name] = variants[np.argmax(counts_by_variant)]

In [52]:
normalized_tourneys = []
for t in tourney_json:
    new_tourney = dict(t)
    games = new_tourney['games']
    new_games = []
    for game in games:
        new_game = dict(game)
        new_scores = {}
        for k, v in game['scores'].items():
            new_scores[canonical_names[normalized_user_names2[k]]] = v
        new_game['scores'] = new_scores
        new_games.append(new_game)
    new_tourney['games'] = new_games
    normalized_tourneys.append(new_tourney)

In [53]:
normalized_tourneys[1]

{'id': 13718089,
 'name': 'Xenopsaris X-Factor',
 'url': 'https://challonge.com/wtds_xx',
 'date': None,
 'games': [{'completed_at': '2023-11-17T22:15:51.338-06:00',
   'winner': 'ChopYouUp',
   'scores': {'lemon': 70, 'ChopYouUp': 133}},
  {'completed_at': '2023-11-17T22:15:51.338-06:00',
   'winner': 'ChopYouUp',
   'scores': {'lemon': 81, 'ChopYouUp': 114}},
  {'completed_at': '2023-11-17T22:15:51.338-06:00',
   'winner': 'ChopYouUp',
   'scores': {'lemon': 0, 'ChopYouUp': 0}},
  {'completed_at': '2023-11-18T10:23:09.923-06:00',
   'winner': 'noravens',
   'scores': {'Orcinus': 105, 'noravens': 107}},
  {'completed_at': '2023-11-18T10:23:09.923-06:00',
   'winner': 'noravens',
   'scores': {'Orcinus': 81, 'noravens': 103}},
  {'completed_at': '2023-11-18T10:23:09.923-06:00',
   'winner': 'noravens',
   'scores': {'Orcinus': 0, 'noravens': 0}},
  {'completed_at': '2023-11-17T10:35:14.124-06:00',
   'winner': 'gtrudat',
   'scores': {'smelly_cat': 99, 'gtrudat': 82}},
  {'completed_at

# Remove tournaments that are not 2-player Wingspan

In [54]:
normalized_tourneys = [t for t in normalized_tourneys if 'Joint Jabiru Joust' not in t['name']
               and 'Tournament Discord Server Image Bird' not in t['name']
              and 'Toco Toucan Triple Team Tournament' not in t['name']
             and 'Birds of a Feather' not in t['name']
            and 'Diamond Firetail Foursomes' not in t['name']
            and len(t['games'])]

In [55]:
for t in normalized_tourneys:
    t['games'] = [g for g in t['games'] if min(g['scores'].values()) > 0]

# Save to JSON

In [56]:
with open('wingspan_tournaments.json', 'w') as f:
    json.dump(normalized_tourneys, f, indent=2)

# Fix more names

In [57]:
with open('wingspan_tournaments.json', 'r') as f:
    normalized_tourneys = json.load(f)

In [58]:
extra_substitutions = {
    'FlanSwitch': 'Flan',
    'Birdwatchermobile': 'Birdwatcher123',
    'Mliguori16 (Birdwatcher123)': 'Birdwatcher123',
    'Cap': 'Capitalist111#3362 (Cap)',
    'HAyAsIiI (slowPigeon)': 'slowPigeon',
    'Jjlig2mobile2': 'Jjlig2',
    'No~M l IGN: danaran': 'No~M (danaran)',
    'Rafa oakie (oakie)': 'Rafa (oakie)',
    'Shiny (ShinyEmmy)': 'ShinyEmmy',
    'Typhus / rnicolas (Typhus)': 'Typhus',
    'Wingsplain': 'Wingsplain Gaming (ShadowFox)',
    "oakie6439\t(oakie)": 'oakie',
    "nsegal14 (SegalLikeTheBird)": "SegalLikeTheBird",
    "e8v2000 (ScaredofBirds)": "ScaredofBirds",
    "eefdeaardappel (ooievaar) - The Netherlands": "ooievaar",
    "ooievaarðŸ‡³ðŸ‡± (ign moustache)": "ooievaar",
    "Johnlim": "Jlim (Johnlim)",
    "HaveAGoodOne": "Kip (ign: HaveAGoodOne)",
    "cedarwax": "maggie (cedarwax)",
    "mellymelyay": "mcdo0530 (mellymelyay)",
    "nardybux (MonkeyWren)": "MonkeyWren",
    "ScottCS (MortalWings)": "MortalWings",
    "Trung (Stalledbird)": "Stalledbird"
}

for t in normalized_tourneys:
    for g in t['games']:
        to_substitute = [user for user in g['scores'].keys() if user in extra_substitutions]
        for user in to_substitute:
            score = g['scores'][user]
            del g['scores'][user]
            g['scores'][extra_substitutions[user]] = score
                
with open('wingspan_tournaments.json', 'w') as f:
    json.dump(normalized_tourneys, f, indent=2)

In [59]:
normalized_tourneys[1]

{'id': 13718089,
 'name': 'Xenopsaris X-Factor',
 'url': 'https://challonge.com/wtds_xx',
 'date': None,
 'games': [{'completed_at': '2023-11-17T22:15:51.338-06:00',
   'winner': 'ChopYouUp',
   'scores': {'lemon': 70, 'ChopYouUp': 133}},
  {'completed_at': '2023-11-17T22:15:51.338-06:00',
   'winner': 'ChopYouUp',
   'scores': {'lemon': 81, 'ChopYouUp': 114}},
  {'completed_at': '2023-11-18T10:23:09.923-06:00',
   'winner': 'noravens',
   'scores': {'Orcinus': 105, 'noravens': 107}},
  {'completed_at': '2023-11-18T10:23:09.923-06:00',
   'winner': 'noravens',
   'scores': {'Orcinus': 81, 'noravens': 103}},
  {'completed_at': '2023-11-17T10:35:14.124-06:00',
   'winner': 'gtrudat',
   'scores': {'smelly_cat': 99, 'gtrudat': 82}},
  {'completed_at': '2023-11-17T10:35:14.124-06:00',
   'winner': 'gtrudat',
   'scores': {'smelly_cat': 92, 'gtrudat': 94}},
  {'completed_at': '2023-11-17T10:35:14.124-06:00',
   'winner': 'gtrudat',
   'scores': {'smelly_cat': 108, 'gtrudat': 119}},
  {'comp