In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
import imageio

In [2]:
pd.set_option('display.max_rows', 500)
plt.style.use('seaborn-dark')
print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']


In [3]:
# data from grafana monitoring of lichess.org (websocket connections as proxy for players)
df = pd.read_csv('~/git/wclichess/wcdata.csv')

In [4]:
# fixtures from https://fixturedownload.com/results/fifa-world-cup-2022
fixtures = pd.read_csv('fifa-world-cup-2022-UTC.csv')

In [5]:
df = df.fillna(method='ffill') # cover missing points in grafana data
df['Time'] = pd.to_datetime(df.Time)
df['players'] = pd.to_numeric(df['players'].str.split().str[0]) # from 60.0 K string to 60.0 float

In [6]:
df['day'] = df.Time.dt.date
df['daytime'] = df.Time.dt.strftime('%H-%M')

In [7]:
days = df.day.unique()[4:] # world cup days
standard_day = df.day.unique()[1] # normal day example

In [8]:
# create table to see what matches were played when and by which teams
fixture_lookup = fixtures[['Home Team', 'Away Team']].stack().reset_index()\
    .merge(fixtures['Date'], how='inner', left_on='level_0', right_index=True)\
    .drop(columns=['level_0', 'level_1'])\
    .rename(columns={0: 'Country'})
fixture_lookup['Day'] = pd.to_datetime(fixture_lookup.Date, dayfirst=True).dt.date
fixture_lookup['Time'] = pd.to_datetime(fixture_lookup.Date).dt.strftime('%H-%M')

In [9]:
# some names were too long to label neatly
fixture_lookup['Country'] = fixture_lookup.Country.replace({'Korea Republic': 'Korea', 'Saudi Arabia': 'S Arabia'})

In [10]:
# getting the placement of the graph labels correct for each kickoff
kickoffs = {
    '10-00': ([40,40], [40, 110], 'k--'),
    '13-00': ([52,52], [40, 110], 'k--'),
    '15-00': ([60,60], [40, 110], 'k--'),
    '16-00': ([64,64], [40, 110], 'k--'),
    '19-00': ([76,76], [40, 110], 'k--'),
}
kickoff_labels = {
    '10-00': 0.42,
    '13-00': 0.52,
    '15-00': 0.57,
    '16-00': 0.60,
    '19-00': 0.70
}

In [11]:
# make all the graphs for each day with both normal day and the specified day
for n, day in enumerate(days):
    f, ax = plt.subplots(figsize=(10, 7))
    plot_df = df.query('day == @day')[['daytime', 'players']]
    plot_df.plot(x='daytime', y='players', ax=ax, legend=False, color='g')
    plot_df = df.query('day == @standard_day')[['daytime', 'players']]
    plot_df.plot(x='daytime', y='players', ax=ax, legend=False, color='black')
    plt.title('Lichess.org Users During Football World Cup Relative to Normal Day', fontsize='16.5')
    # add labels for each kickoff
    for t in kickoffs.keys():
        matches = fixture_lookup.query('Day == @days[@n] & Time == @t')
        if not matches.empty:
            plt.plot(*kickoffs[t], lw=1, dashes=[2,2])
            plt.figtext(kickoff_labels[t], 0.15, matches.Country.to_string(index=False), fontsize='medium')
    plt.figtext(0.15, 0.8, day, fontsize='xx-large')
    # credit
    plt.figtext(0.15, 0.15, 'github @michael1241', fontsize='small')
    plt.xlabel('Time', fontsize='15')
    plt.ylabel('Online Users in 1000s', fontsize='15')
    plt.ylim([30,110])
    plt.savefig(f'./img/img_{n}.png', 
                transparent = False
               )
    plt.close()

In [15]:
frames = []
for n, day in enumerate(days):
    image = imageio.v2.imread(f'./img/img_{n}.png')
    frames.append(image)
# mkdir ./img/ first
imageio.mimsave('./img/output.gif', frames, fps = 1)