In [1]:
import json
import pandas as pd

from itertools import product

In [2]:
def get_top_players(min_year, max_year=None, top=10, min_played_time=450):
    if max_year is None: max_year = min_year

    played_time_per_player = dict()
    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_squads.json') as f: squads = json.load(f)
        for game in squads:
            for sub_game in squads[game]:
                if sub_game == 'Summary': continue
                for player in squads[game][sub_game]['Home']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

                for player in squads[game][sub_game]['Away']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

    played_time_per_player = pd.Series(played_time_per_player)

    if min_year == max_year:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}.csv')
    else:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}_to_{max_year}.csv')

    players_map = {idx+1: cod for idx, cod in enumerate(model_data['players'])}
    players_map = pd.Series(players_map)

    results = results[results['variable'].str.contains('skills')]
    results['player_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
    results['player_cod'] = results['player_index'].map(players_map)
    results['played_time'] = results['player_cod'].map(played_time_per_player)
    results = results[results['played_time'] >= min_played_time]
    top_players_df = results.sort_values(by='mean', ascending=False).head(top)
    top_players = top_players_df['player_cod'].tolist()
    top_players = {cod: {'name': list(), 'clubs': list()} for cod in top_players}

    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_games.json') as f: real_data = json.load(f)
        for game in real_data:
            players = real_data[game]['Players']
            for cod, player in product(top_players, players):
                if cod in player[0]:
                    name = player[0]
                    name = name \
                        .replace(f'RP{cod}', '') \
                        .replace(f'TP{cod}', '') \
                        .replace(f'R(g)P{cod}', '') \
                        .replace(f'T(g)P{cod}', '')
                    
                    while name[0].isdigit(): name = name[1:]
                    name = name.strip()
                    top_players[cod]['name'].append(name)
                    top_players[cod]['clubs'].append(player[1])

    top_players = {k: {'name': list(set(v['name'])), 'clubs': list(set(v['clubs']))} for k, v in top_players.items()}

    return top_players


In [3]:
get_top_players(2019)

{'295792': {'name': ['Eduardo Sasha Eduardo Colcenti Antunes'],
  'clubs': ['Santos / SP']},
 '308495': {'name': ['Daniel Daniel Sampaio Simoes'],
  'clubs': ['Fluminense / RJ']},
 '521990': {'name': ['De Arrascaeta Giorgian Daniel de A...',
   'De Arrascaeta Giorgian Daniel de A ...'],
  'clubs': ['Flamengo / RJ']},
 '294382': {'name': ['Zeca Jose Carlos Cracco Neto'],
  'clubs': ['Internacional / RS']},
 '175411': {'name': ['Paulo Victor Paulo Victor Mileo V ...',
   'Paulo Victor Paulo Victor Mileo V...'],
  'clubs': ['Grêmio / RS']},
 '182423': {'name': ['Manoel Manoel Messias Silva ...',
   'Manoel Manoel Messias Silva...'],
  'clubs': ['Corinthians / SP']},
 '170272': {'name': ['Victor FerrazVictor Ferraz Macedo',
   'Victor Ferraz Victor Ferraz Macedo'],
  'clubs': ['Santos / SP']},
 '150201': {'name': ['Diego Tard ... Diego Tardelli Martins'],
  'clubs': ['Grêmio / RS']},
 '291738': {'name': ['Dudu Eduardo Pereira Rodrigues'],
  'clubs': ['Palmeiras / SP']},
 '308316': {'name':

In [4]:
min_year = 2014
max_year = 2023

In [5]:
get_top_players(min_year, max_year, top=10, min_played_time=90*38*2)

{'155508': {'name': ['Wilson Wilson Rodrigues de  ...',
   'Junior Wilson Rodrigues de ...',
   'Wilson Wilson Rodrigues de ...',
   'Junior Wilson Rodrigues de  ...'],
  'clubs': ['Vitória / BA', 'Atlético Mineiro / MG', 'Coritiba / PR']},
 '166276': {'name': ['Cássio Cassio Ramos', 'Cassio Cassio Ramos'],
  'clubs': ['Corinthians / SP']},
 '308316': {'name': ['Tadeu Tadeu Antonio Ferreira'], 'clubs': ['Goiás / GO']},
 '567555': {'name': ['Victor Cuesta Victor Leandro Cuesta',
   'Victor Cue ... Victor Leandro Cuesta'],
  'clubs': ['Internacional / RS', 'Botafogo / RJ']},
 '293428': {'name': ['Aderbar Aderbar Melo dos San...',
   'Santos Aderbar Melo dos San...',
   'Aderbar Aderbar Melo dos San ...',
   'Santos Aderbar Melo dos San ...'],
  'clubs': ['Athletico Paranaense / PR', 'Flamengo / RJ']},
 '156659': {'name': ['Victor Victor Leandro Bagy'],
  'clubs': ['Atlético Mineiro / MG']},
 '159684': {'name': ['fernando miguel kalfmann T(g) 159684',
   'Fernando M ... Fernando Miguel Ka

In [6]:
with open(f'../real_data/BT_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_BT_1_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,0.630971
1,Flamengo / RJ,0.504028
2,Corinthians / SP,0.414555
3,Internacional / RS,0.378669
4,Atlético Mineiro / MG,0.370443


In [7]:
with open(f'../real_data/BT_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_BT_2_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,0.612616
1,Flamengo / RJ,0.597788
2,Corinthians / SP,0.50288
3,Atlético Mineiro / MG,0.466377
4,Internacional / RS,0.464202


In [8]:
with open(f'../real_data/Poisson_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_Poisson_1_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,1.062381
1,Flamengo / RJ,0.999999
2,Atlético Mineiro / MG,0.987861
3,Grêmio / RS,0.969496
4,Corinthians / SP,0.947948


In [9]:
with open(f'../real_data/Poisson_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_Poisson_2_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,1.073632
1,Flamengo / RJ,1.000001
2,Atlético Mineiro / MG,0.992395
3,Grêmio / RS,0.969061
4,Corinthians / SP,0.951162


In [10]:
with open(f'../real_data/KN_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_KN_1_{min_year}_to_{max_year}.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
n_clubs = len(model_data['clubs'])
clubs = dict()
for inx, cod in enumerate(model_data['clubs']):
    clubs[inx + 1] = cod
    clubs[inx + n_clubs + 1] = cod
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.startswith('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.startswith('d')][['club_cod', 'mean']]
delta_attack_results = results[results['variable'].str.startswith('ha')][['club_cod', 'mean']]
delta_defense_results = results[results['variable'].str.startswith('hd')][['club_cod', 'mean']]

overall_results = attack_results \
    .merge(defense_results, on='club_cod', suffixes=('_attack', '_defense')) \
    .merge(delta_attack_results, on='club_cod', suffixes=('', '')) \
    .merge(delta_defense_results, on='club_cod', suffixes=('_delta_atk', '_delta_def'))

overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense'] + overall_results['mean_delta_atk'] - overall_results['mean_delta_def']
overall_results \
    .groupby('club_cod') \
    .mean() \
    .reset_index() \
    .sort_values(by='overall', ascending=False, ignore_index=True) \
    .head()


Unnamed: 0,club_cod,mean_attack,mean_defense,mean_delta_atk,mean_delta_def,overall
0,Palmeiras / SP,0.251187,-0.116886,0.135958,-0.050648,0.55468
1,Flamengo / RJ,0.270776,-0.090437,0.139917,-0.036356,0.537485
2,Atlético Mineiro / MG,0.231658,-0.061304,0.122369,-0.01912,0.43445
3,Grêmio / RS,0.161427,-0.108334,0.079892,-0.072286,0.421939
4,Corinthians / SP,0.116598,-0.161968,0.052347,-0.07061,0.401523


In [11]:
with open(f'../real_data/KN_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_KN_2_{min_year}_to_{max_year}.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['clubs'])}
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.contains('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.contains('d')][['club_cod', 'mean']]

overall_results = attack_results.merge(defense_results, on='club_cod', suffixes=('_attack', '_defense'))
overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense']
overall_results.sort_values(by='overall', ascending=False, ignore_index=True).head()

Unnamed: 0,club_cod,mean_attack,mean_defense,overall
0,Palmeiras / SP,0.350943,-0.187646,0.538589
1,Flamengo / RJ,0.368651,-0.13787,0.506521
2,Atlético Mineiro / MG,0.315653,-0.095472,0.411125
3,Grêmio / RS,0.214566,-0.190561,0.405126
4,Corinthians / SP,0.136064,-0.245469,0.381533
