In [1]:
import json
import pandas as pd

from itertools import product

In [2]:
def get_top_players(min_year, max_year=None, top=10, min_played_time=450):
    if max_year is None: max_year = min_year

    played_time_per_player = dict()
    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_squads.json') as f: squads = json.load(f)
        for game in squads:
            for sub_game in squads[game]:
                if sub_game == 'Summary': continue
                for player in squads[game][sub_game]['Home']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

                for player in squads[game][sub_game]['Away']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

    played_time_per_player = pd.Series(played_time_per_player)

    if min_year == max_year:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}.csv')
    else:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}_to_{max_year}.csv')

    players_map = {idx+1: cod for idx, cod in enumerate(model_data['players'])}
    players_map = pd.Series(players_map)

    results = results[results['variable'].str.contains('skills')]
    results['player_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
    results['player_cod'] = results['player_index'].map(players_map)
    results['played_time'] = results['player_cod'].map(played_time_per_player)
    results = results[results['played_time'] >= min_played_time]
    top_players_df = results.sort_values(by='mean', ascending=False).head(top)
    top_players = top_players_df['player_cod'].tolist()
    top_players = {cod: {'name': list(), 'clubs': list()} for cod in top_players}

    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_games.json') as f: real_data = json.load(f)
        for game in real_data:
            players = real_data[game]['Players']
            for cod, player in product(top_players, players):
                if cod in player[0]:
                    name = player[0]
                    name = name \
                        .replace(f'RP{cod}', '') \
                        .replace(f'TP{cod}', '') \
                        .replace(f'R(g)P{cod}', '') \
                        .replace(f'T(g)P{cod}', '')
                    
                    while name[0].isdigit(): name = name[1:]
                    name = name.strip()
                    top_players[cod]['name'].append(name)
                    top_players[cod]['clubs'].append(player[1])

    top_players = {k: {'name': list(set(v['name'])), 'clubs': list(set(v['clubs']))} for k, v in top_players.items()}

    return top_players


In [3]:
get_top_players(2019)

{'294382': {'name': ['Zeca Jose Carlos Cracco Neto'],
  'clubs': ['Internacional / RS']},
 '313426': {'name': ['Gustavo Sc ... Gustavo Henrique Fur ...'],
  'clubs': ['Palmeiras / SP']},
 '293420': {'name': ['Willian Arão Willian Souza Arao d ...',
   'Willian Arão Willian Souza Arao d...'],
  'clubs': ['Flamengo / RJ']},
 '313780': {'name': ['Welison José Welison da Silva'],
  'clubs': ['Atlético Mineiro / MG']},
 '504064': {'name': ['Marquinhos Jose Marcos Costa Martins'],
  'clubs': ['Atlético Mineiro / MG']},
 '184600': {'name': ['Wellington Wellington Aparecido ...',
   'Wellington Wellington Aparecido...'],
  'clubs': ['Athletico Paranaense / PR']},
 '143465': {'name': ['Alecsandro Alecsandro Barbosa F ...'],
  'clubs': ['CSA / AL']},
 '610318': {'name': ['Tomas Andrade Tomas Gustavo Andrade'],
  'clubs': ['Athletico Paranaense / PR']},
 '318309': {'name': ['Gabriel Co ... Gabriel Costa Franca'],
  'clubs': ['Botafogo / RJ']},
 '565356': {'name': ['Trauco Miguel Angel Trauco  ...

In [4]:
min_year = 2019
max_year = 2023

In [5]:
get_top_players(min_year, max_year, top=15, min_played_time=90*38*2)

{'567555': {'name': ['Victor Cue ... Victor Leandro Cuesta',
   'Victor Cuesta Victor Leandro Cuesta'],
  'clubs': ['Internacional / RS', 'Botafogo / RJ']},
 '346636': {'name': ['Rene Rene Rodrigues Martins'],
  'clubs': ['Flamengo / RJ', 'Internacional / RS']},
 '521990': {'name': ['de Arrasca ... Giorgian Daniel de A ...',
   'De Arrascaeta Giorgian Daniel de A ...',
   'De Arrascaeta Giorgian Daniel de A...'],
  'clubs': ['Flamengo / RJ']},
 '332318': {'name': ['Reinaldo Reinaldo Manoel da Silva'],
  'clubs': ['Grêmio / RS', 'São Paulo / SP']},
 '293428': {'name': ['Santos Aderbar Melo dos San...',
   'Santos Aderbar Melo dos San ...'],
  'clubs': ['Flamengo / RJ', 'Athletico Paranaense / PR']},
 '308316': {'name': ['Tadeu Tadeu Antonio Ferreira'], 'clubs': ['Goiás / GO']},
 '159684': {'name': ['fernando miguel kalfmann T(g) 159684',
   'Fernando M ... Fernando Miguel Kaufmann'],
  'clubs': ['Vasco da Gama / RJ', 'Fortaleza / CE', 'Atlético / GO']},
 '330886': {'name': ['Ze Rafael J

In [6]:
with open(f'../real_data/BT_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_BT_1_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,0.705238
1,Flamengo / RJ,0.650046
2,Atlético Mineiro / MG,0.493314
3,Corinthians / SP,0.491645
4,Internacional / RS,0.476837


In [7]:
with open(f'../real_data/BT_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_BT_2_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Flamengo / RJ,0.859827
1,Palmeiras / SP,0.790723
2,Atlético Mineiro / MG,0.505037
3,Grêmio / RS,0.388062
4,Internacional / RS,0.337127


In [8]:
with open(f'../real_data/Poisson_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_Poisson_1_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
# results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()
results.sort_values(by='mean', ascending=False, ignore_index=True)

Unnamed: 0,variable,mean,median,sd,mad,q5,q95,q2.75,q97.5,club_index,club_cod
0,skills[11],1.345897,1.340625,0.09673,0.098252,1.195901,1.508975,1.172576,1.545137,11,Flamengo / RJ
1,skills[7],1.320579,1.317205,0.096389,0.094716,1.16976,1.48152,1.149019,1.515632,7,Palmeiras / SP
2,skills[3],1.168935,1.16428,0.085728,0.084026,1.032855,1.314018,1.015159,1.348011,3,Atlético Mineiro / MG
3,skills[16],1.120853,1.11711,0.081732,0.080513,0.99292,1.264692,0.975257,1.296485,16,Internacional / RS
4,skills[9],1.052729,1.05204,0.075244,0.076154,0.934782,1.17888,0.915749,1.208595,9,São Paulo / SP
5,skills[21],1.047707,1.044105,0.080241,0.08009,0.923267,1.181202,0.903283,1.215887,21,Red Bull Bragantino / SP
6,skills[13],1.046478,1.044335,0.076505,0.074745,0.924633,1.175593,0.905092,1.201361,13,Fluminense / RJ
7,skills[19],1.030513,1.028815,0.07553,0.07582,0.913504,1.158582,0.892519,1.18911,19,Athletico Paranaense / PR
8,skills[18],1.015383,1.01243,0.072972,0.072337,0.901288,1.137579,0.882963,1.165391,18,Corinthians / SP
9,skills[1],0.999999,0.999998,9.7e-05,9.2e-05,0.99984,1.00016,0.999815,1.00019,1,Grêmio / RS


In [9]:
with open(f'../real_data/Poisson_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_Poisson_2_{min_year}_to_{max_year}.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head(10)

Unnamed: 0,club_cod,mean
0,Flamengo / RJ,1.394586
1,Palmeiras / SP,1.360014
2,Atlético Mineiro / MG,1.194682
3,Internacional / RS,1.144756
4,São Paulo / SP,1.073044
5,Red Bull Bragantino / SP,1.066298
6,Fluminense / RJ,1.057638
7,Athletico Paranaense / PR,1.045957
8,Corinthians / SP,1.032847
9,Grêmio / RS,0.999999


In [10]:
with open(f'../real_data/KN_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_KN_1_{min_year}_to_{max_year}.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
n_clubs = len(model_data['clubs'])
clubs = dict()
for inx, cod in enumerate(model_data['clubs']):
    clubs[inx + 1] = cod
    clubs[inx + n_clubs + 1] = cod
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.startswith('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.startswith('d')][['club_cod', 'mean']]
delta_attack_results = results[results['variable'].str.startswith('ha')][['club_cod', 'mean']]
delta_defense_results = results[results['variable'].str.startswith('hd')][['club_cod', 'mean']]

overall_results = attack_results \
    .merge(defense_results, on='club_cod', suffixes=('_attack', '_defense')) \
    .merge(delta_attack_results, on='club_cod', suffixes=('', '')) \
    .merge(delta_defense_results, on='club_cod', suffixes=('_delta_atk', '_delta_def'))

overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense'] + overall_results['mean_delta_atk'] - overall_results['mean_delta_def']
overall_results \
    .groupby('club_cod') \
    .mean() \
    .reset_index() \
    .sort_values(by='overall', ascending=False, ignore_index=True) \
    .head()


Unnamed: 0,club_cod,mean_attack,mean_defense,mean_delta_atk,mean_delta_def,overall
0,Palmeiras / SP,0.259089,-0.172874,0.117177,-0.083499,0.632639
1,Flamengo / RJ,0.368535,-0.050599,0.153718,-0.040468,0.613321
2,Atlético Mineiro / MG,0.195741,-0.103115,0.08599,-0.042641,0.427487
3,Internacional / RS,0.104985,-0.119163,0.080185,-0.043854,0.348187
4,Grêmio / RS,0.212753,0.02562,0.107411,0.000105,0.29444


In [11]:
with open(f'../real_data/KN_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
results = pd.read_csv(f'../results/results_KN_2_{min_year}_to_{max_year}.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['clubs'])}
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.contains('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.contains('d')][['club_cod', 'mean']]

overall_results = attack_results.merge(defense_results, on='club_cod', suffixes=('_attack', '_defense'))
overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense']
overall_results.sort_values(by='overall', ascending=False, ignore_index=True).head()

Unnamed: 0,club_cod,mean_attack,mean_defense,overall
0,Palmeiras / SP,0.350004,-0.274996,0.625001
1,Flamengo / RJ,0.4807,-0.102199,0.582899
2,Atlético Mineiro / MG,0.257943,-0.154443,0.412385
3,Internacional / RS,0.175777,-0.179461,0.355237
4,Grêmio / RS,0.299643,0.018199,0.281444
