In [1]:
import json
import pandas as pd

from itertools import product

In [2]:
def get_top_players(min_year, max_year=None, top=10, min_played_time=450):
    if max_year is None: max_year = min_year

    played_time_per_player = dict()
    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_squads.json') as f: squads = json.load(f)
        for game in squads:
            for sub_game in squads[game]:
                if sub_game == 'Summary': continue
                for player in squads[game][sub_game]['Home']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

                for player in squads[game][sub_game]['Away']['Squad']:
                    if player not in played_time_per_player:
                        played_time_per_player[player] = 0
                    
                    played_time_per_player[player] += squads[game][sub_game]['Time']

    played_time_per_player = pd.Series(played_time_per_player)

    if min_year == max_year:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}.csv')
    else:
        with open(f'../real_data/Poisson_with_players_model_data_{min_year}_to_{max_year}.json') as f: model_data = json.load(f)
        results = pd.read_csv(f'../results/results_Poisson_with_players_{min_year}_to_{max_year}.csv')

    players_map = {idx+1: cod for idx, cod in enumerate(model_data['players'])}
    players_map = pd.Series(players_map)

    results = results[results['variable'].str.contains('skills')]
    results['player_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
    results['player_cod'] = results['player_index'].map(players_map)
    results['played_time'] = results['player_cod'].map(played_time_per_player)
    results = results[results['played_time'] >= min_played_time]
    top_players_df = results.sort_values(by='mean', ascending=False).head(top)
    top_players = top_players_df['player_cod'].tolist()
    top_players = {cod: {'name': list(), 'clubs': list()} for cod in top_players}

    for year in range(min_year, max_year+1):
        with open(f'../../../Data/results/processed/Serie_A_{year}_games.json') as f: real_data = json.load(f)
        for game in real_data:
            players = real_data[game]['Players']
            for cod, player in product(top_players, players):
                if cod in player[0]:
                    name = player[0]
                    name = name \
                        .replace(f'RP{cod}', '') \
                        .replace(f'TP{cod}', '') \
                        .replace(f'R(g)P{cod}', '') \
                        .replace(f'T(g)P{cod}', '')
                    
                    while name[0].isdigit(): name = name[1:]
                    name = name.strip()
                    top_players[cod]['name'].append(name)
                    top_players[cod]['clubs'].append(player[1])

    top_players = {k: {'name': list(set(v['name'])), 'clubs': list(set(v['clubs']))} for k, v in top_players.items()}

    return top_players


In [3]:
get_top_players(2019)

{'320593': {'name': ['Jorge Jorge Marco de Olive...',
   'Jorge Jorge Marco de Olive ...'],
  'clubs': ['Santos / SP']},
 '540839': {'name': ['Cazares Juan Ramon Cazares S ...'],
  'clubs': ['Atlético Mineiro / MG']},
 '298485': {'name': ['Michel Michel Ferreira dos ...',
   'Michel Michel Ferreira dos  ...'],
  'clubs': ['Grêmio / RS']},
 '409960': {'name': ['Luan Luan Guilherme de Je ...',
   'Luan Luan Guilherme de Je...'],
  'clubs': ['Grêmio / RS']},
 '185012': {'name': ['Joao Paulo Joao Paulo Mior'],
  'clubs': ['Botafogo / RJ']},
 '521990': {'name': ['De Arrascaeta Giorgian Daniel de A...',
   'De Arrascaeta Giorgian Daniel de A ...'],
  'clubs': ['Flamengo / RJ']},
 '177050': {'name': ['Fagner Fagner Conserva Lemos'],
  'clubs': ['Corinthians / SP']},
 '180622': {'name': ['Vladimir Vladimir Orlando Car ...',
   'Vladimir Vladimir Orlando Car...'],
  'clubs': ['Avaí / SC']},
 '159238': {'name': ['Diego Alves Diego Alves Carreira'],
  'clubs': ['Flamengo / RJ']},
 '291738': {'nam

In [4]:
get_top_players(2019, 2023, top=10, min_played_time=90*38*2)

{'308316': {'name': ['Tadeu Tadeu Antonio Ferreira'], 'clubs': ['Goiás / GO']},
 '339876': {'name': ['Diogo Diogo Barbosa Mendanha',
   'Diogo Barbosa Diogo Barbosa Mendanha'],
  'clubs': ['Grêmio / RS', 'Fluminense / RJ', 'Palmeiras / SP']},
 '362877': {'name': ['Joao PauloJoao Paulo Silva Martins',
   'Joao Paulo Joao Paulo Silva Martins'],
  'clubs': ['Santos / SP']},
 '166276': {'name': ['Cássio Cassio Ramos'], 'clubs': ['Corinthians / SP']},
 '405713': {'name': ['Marlon Fre ... Marlon Rodrigues Freitas'],
  'clubs': ['Atlético / GO', 'Botafogo / RJ']},
 '188398': {'name': ['Everson Everson Felipe Marqu...',
   'EversonEverson Felipe Marqu ...',
   'Everson Everson Felipe Marqu ...'],
  'clubs': ['Atlético Mineiro / MG', 'Santos / SP']},
 '129292': {'name': ['Fabio Fabio Deivson Lopes ...',
   'Fabio Fabio Deivson Lopes  ...'],
  'clubs': ['Fluminense / RJ', 'Cruzeiro / MG']},
 '293428': {'name': ['Santos Aderbar Melo dos San...',
   'Santos Aderbar Melo dos San ...'],
  'clubs': [

In [5]:
with open('../real_data/BT_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_BT_1_2019_to_2023.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Palmeiras / SP,0.705238
1,Flamengo / RJ,0.650046
2,Atlético Mineiro / MG,0.493314
3,Corinthians / SP,0.491645
4,Internacional / RS,0.476837


In [6]:
with open('../real_data/BT_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_BT_2_2019_to_2023.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skill\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Flamengo / RJ,0.859827
1,Palmeiras / SP,0.790723
2,Atlético Mineiro / MG,0.505037
3,Grêmio / RS,0.388062
4,Internacional / RS,0.337127


In [7]:
with open('../real_data/Poisson_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_Poisson_1_2019_to_2023.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Flamengo / RJ,1.340302
1,Palmeiras / SP,1.312979
2,Atlético Mineiro / MG,1.163687
3,Internacional / RS,1.119664
4,São Paulo / SP,1.049573


In [8]:
with open('../real_data/Poisson_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_Poisson_2_2019_to_2023.csv')
results = results[results['variable'].str.contains('skill')]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['teams'])}
results['club_index'] = results['variable'].str.extract(r'skills\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)
results.sort_values(by='mean', ascending=False, ignore_index=True)[['club_cod', 'mean']].head()

Unnamed: 0,club_cod,mean
0,Flamengo / RJ,1.398124
1,Palmeiras / SP,1.364231
2,Atlético Mineiro / MG,1.197418
3,Internacional / RS,1.14719
4,São Paulo / SP,1.073839


In [9]:
with open('../real_data/KN_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_KN_1_2019_to_2023.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
n_clubs = len(model_data['clubs'])
clubs = dict()
for inx, cod in enumerate(model_data['clubs']):
    clubs[inx + 1] = cod
    clubs[inx + n_clubs + 1] = cod
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.startswith('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.startswith('d')][['club_cod', 'mean']]
delta_attack_results = results[results['variable'].str.startswith('ha')][['club_cod', 'mean']]
delta_defense_results = results[results['variable'].str.startswith('hd')][['club_cod', 'mean']]

overall_results = attack_results \
    .merge(defense_results, on='club_cod', suffixes=('_attack', '_defense')) \
    .merge(delta_attack_results, on='club_cod', suffixes=('', '')) \
    .merge(delta_defense_results, on='club_cod', suffixes=('_delta_atk', '_delta_def'))

overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense'] + overall_results['mean_delta_atk'] - overall_results['mean_delta_def']
overall_results \
    .groupby('club_cod') \
    .mean() \
    .reset_index() \
    .sort_values(by='overall', ascending=False, ignore_index=True) \
    .head()


Unnamed: 0,club_cod,mean_attack,mean_defense,mean_delta_atk,mean_delta_def,overall
0,Palmeiras / SP,0.259089,-0.172874,0.117177,-0.083499,0.632639
1,Flamengo / RJ,0.368535,-0.050599,0.153718,-0.040468,0.613321
2,Atlético Mineiro / MG,0.195741,-0.103115,0.08599,-0.042641,0.427487
3,Internacional / RS,0.104985,-0.119163,0.080185,-0.043854,0.348187
4,Grêmio / RS,0.212753,0.02562,0.107411,0.000105,0.29444


In [10]:
with open('../real_data/KN_model_data_2019_to_2023.json') as f: model_data = json.load(f)
results = pd.read_csv('../results/results_KN_2_2019_to_2023.csv')
results = results[~results['variable'].isin(['lp__', 'mu'])]
clubs = {inx+1: cod for inx, cod in enumerate(model_data['clubs'])}
results['club_index'] = results['variable'].str.extract(r'\[(\d+)\]').astype(int)
results['club_cod'] = results['club_index'].map(clubs)

attack_results = results[results['variable'].str.contains('a')][['club_cod', 'mean']]
defense_results = results[results['variable'].str.contains('d')][['club_cod', 'mean']]

overall_results = attack_results.merge(defense_results, on='club_cod', suffixes=('_attack', '_defense'))
overall_results['overall'] = overall_results['mean_attack'] - overall_results['mean_defense']
overall_results.sort_values(by='overall', ascending=False, ignore_index=True).head()

Unnamed: 0,club_cod,mean_attack,mean_defense,overall
0,Palmeiras / SP,0.350004,-0.274996,0.625001
1,Flamengo / RJ,0.4807,-0.102199,0.582899
2,Atlético Mineiro / MG,0.257943,-0.154443,0.412385
3,Internacional / RS,0.175777,-0.179461,0.355237
4,Grêmio / RS,0.299643,0.018199,0.281444
