# Lichess Tournament Games

In [None]:
import berserk, pandas, time
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import schedule

# Token file in marisa's folder
with open('lichess.token') as f:
    API_TOKEN = f.read()

session = berserk.TokenSession(API_TOKEN)
client = berserk.Client(session=session)

In [None]:
# See how much time has elapsed between first and last tournaments among "most recent"
# OK, looks like 2 hours passes
dates = [t['finishesAt'] for t in client.tournaments.get()['finished']]
str(max(dates) - min(dates))

### Set-returning functions

In [None]:
def get_top_users():
    return set(client.users.get_all_top_10())


def get_latest_tournaments():
    ''' 
    Return: Set of tournament id's from the past two hours.
    Run every two hours.
    Game data can be immediately extracted.
    ''' 
    latest_tournaments = client.tournaments.get()
    return {t['id'] for t in list(latest_tournaments['finished'])}


def get_tournaments_from_users(users):
    ''' 
    Param: Set of usernames
    Return: Set of tournament id's created by the users
    Game data can be immediately extracted.
    ''' 
    tournaments = set()
    for user in tqdm(users):
        time.sleep(1)
        tournaments_by_creator = list(client.tournaments.stream_by_creator(user))
        if tournaments_by_creator:
            tournaments = tournaments.union({t['id'] for t in tournaments_by_creator})
    return tournaments


def get_users_from_tournaments(t_ids):
    '''
    Param: Set of tournament ids
    Return: Set of usernames of players in the tournament
    '''
    users = set()
    for t_id in t_ids:
        time.sleep(1)
        try:
            # API throws error if tournament not found
            games_by_tournament = list(client.tournaments.export_games(t_id)) #Maybe don't listify
            for game in games_by_tournament:
                users.add(game['players']['white']['user']['name'])
                users.add(game['players']['white']['user']['name'])
        except:
            continue
    return users

### Get tournaments using search-approach

In [None]:
all_users = get_top_users()
all_tournaments = set()

users = all_users
tournaments = all_tournaments
min_tournaments_required = 10 #Change as needed

while all_tournaments < min_tournaments_required:
    new_tournaments = get_tournaments_from_users(users)
    tournaments = new_tournaments.difference(tournaments)
    new_users = get_users_from_tournaments(tournaments)
    users = new_users.difference(users)
    all_users = all_users.union(tournaments)
    all_tournaments = all_tournaments.union(users)
    print(len(all_users), len(all_tournaments))
    # TODO: Update csv

### Get tournaments using timed-intervals

In [None]:
def combiner():
    get_latest_tournaments()
    # TODO: Update csv

schedule.every(2).hours.do(combiner) 

while True:
    schedule.run_pending()

### Get games from tournaments (Run once tournament id's stored in csv)

In [None]:
# start_users = get_top_users()
# tournaments = get_tournaments_from_users(start_users)
# games = []

for tournament in tqdm(tournaments):
    try:
        # API throws error if tournament not found
        games_by_tournament = client.tournaments.export_games(tournament)
        for game in games_by_tournament:
            game['tournament_id'] = tournament
            games.append(game)
    except:
        continue

Convert to df

In [None]:
games_df = pd.concat([pd.json_normalize(game) for game in games], ignore_index=True)
games_df['points1'] = games_df.apply(lambda row: 1 if row.winner == "white" else 0, axis = 1)
games_df['points2'] = games_df.apply(lambda row: 1 if row.winner == "black" else 0, axis = 1)
games_df = games_df.rename(columns={
    "createdAt": "date_time",
    "players.white.user.name": "player1_name", 
    "players.black.user.name": "player2_name"
})[["date_time", "tournament_id", "player1_name", "player2_name", "points1", "points2"]]
games_df.sort_values(by='date_time').reset_index(drop=True)

In [None]:
games_df.to_csv('T-lichess-' + str(datetime.now()) + '.csv', index=False)