In [78]:
import numpy as np
import pandas as pd
# from numpy.random import poisson
from scipy.stats import truncnorm, poisson
from scipy.optimize import minimize

# Usando um parâmetro por jogador

In [26]:
def generate_games():
    games = [[chr(i + 65) for i in range(20)] for j in range(20)]
    for i in range(20):
        games[i].pop(i)
        
    return games

def generate_players(temps = 15):
    players = list(np.random.normal(70, 10, 220))
    players = [round(player, 1) if player < 100 else 100 for player in players]
    clubs = {}
    for year in range(temps):
        line_up = {}
        if type(players) != list:
            np.random.shuffle(list(players.flat))

        players = np.reshape(players, (20, 11))
        for club in range(20):
            line_up[chr(65 + club)] = players[club]

        clubs[year] = line_up
        
    return clubs

def find_median(array):
    arraycopy = array.copy()
    arraycopy.sort()
    if len(arraycopy) % 2 == 0:
        return (arraycopy[len(arraycopy) // 2 - 1] + arraycopy[len(arraycopy) // 2])/2
    else:
        return arraycopy[len(arraycopy) // 2]
    
def find_forces(clubs, year = 0):
    atk_forces = []
    def_forces = []
    for club in clubs[year]:
        atk_forces.append(np.sum(clubs[year][club]))
        def_forces.append(np.log(np.prod(clubs[year][club])))
            
    atk_med = find_median(atk_forces)
    def_med = find_median(def_forces)
    
    for i in range(len(atk_forces)):
        atk_forces[i] = atk_forces[i] / atk_med * 2
        def_forces[i] /= def_med
            
    return atk_forces, def_forces

def model(club1, club2, sims = 10000):
    '''
    receives 2 clubs (forces) and returns the most likely result, probability of each result and probability for the game
    '''
    results = np.zeros((6, 6), dtype = int)
    probs = [0, 0, 0]
    for i in range(sims):
        goals1 = poisson.rvs(club1[0] - club2[1])
        goals2 = poisson.rvs(club2[0] - club1[1])
        if goals1 > 5:
            goals1 = 5
        if goals2 > 5:
            goals2 = 5
        
        results[goals1, goals2] += 1
        
        if goals1 > goals2:
            probs[0] += 1
        elif goals1 == goals2:
            probs[1] += 1
        else:
            probs[2] += 1
    
    for i in range(3):
        probs[i] /= sims
        
    result = [0, 0] # Home - away
    for i in range(len(results)):
        for j in range(len(results[i])):
            if results[i, j] > results[result[0], result[1]]:
                result = [i, j]
            elif results[i, j] == results[result[0], result[1]]:
                # rare cases
                if np.random.random() > 0.5:
                    result = [i, j]
    
    return results, result, probs

In [27]:
def championship(clubs, sims = 10000, year = 0):
    atk_forces, def_forces = find_forces(clubs, year = year)
    games = generate_games()
    results = []
    table = np.zeros((20, 8), dtype = int)
    # lines = clubs
    # columns = {points, games, wins, draws, defeats, goals for, goals against, goal difference}
    
    for i in range(len(games)):
        home = i
        for away in games[i]:
            away = ord(away) - 65
            club1 = (atk_forces[home], def_forces[home])
            club2 = (atk_forces[away], def_forces[away])
            _, result, _ = model(club1, club2, sims = sims)
            results.append([chr(home + 65), result[0], result[1], chr(away + 65)])
            
            # games
            table[home, 1] += 1
            table[away, 1] += 1
            
            # goals
            table[home, 5] += result[0]
            table[away, 5] += result[1]
            table[home, 6] += result[1]
            table[away, 6] += result[0]
            table[home, 7] += result[0] - result[1]
            table[away, 7] += result[1] - result[0]
            
            # results
            if result[0] > result[1]:
                table[home, 2] += 1
                table[home, 0] += 3
                table[away, 4] += 1
            elif result[0] == result[1]:
                table[home, 3] += 1
                table[away, 3] += 1
                table[home, 0] += 1
                table[away, 0] += 1
            else:
                table[home, 4] += 1
                table[away, 2] += 1
                table[away, 0] += 3
    
    columns_names = ['points', 'games', 'wins', 'draws', 'defeats', 'goals for', 'goals against', 'goal difference']
    
    table = pd.DataFrame(table,
                        index = [chr(i + 65) for i in range(20)],
                        columns = columns_names)
    table.sort_values(['points',  'wins', 'goal difference', 'goals for'],
                      axis = 0,
                      ascending = False,
                      inplace = True)
    
    return table, results

In [28]:
clubs = generate_players(temps = 1)
atk_forces, def_forces = find_forces(clubs)

# exemplo
club1 = (atk_forces[ord('A') - 65], def_forces[ord('A') - 65])
club2 = (atk_forces[ord('B') - 65], def_forces[ord('B') - 65])
results, result, probs = model(club1, club2, sims = 10000)

print('Forças do clube 1:', club1)
print('Forças do clube 2:', club2)
print()
print('Quantidade de vezes que o placar foi i x j (linha 5 e coluna 5 representam 5 ou mais gols):')
print(results)
print()
print('O resultado mais provável é {} x {}.'.format(result[0], result[1]))
print('Além disso, a probabilidade do mandante é {}, enquanto a do visitante é de {} e a de empate é {}.'.format(probs[0], probs[2], probs[1]))

Forças do clube 1: (1.9990863407948833, 1.0003636704179553)
Forças do clube 2: (1.986556157410429, 0.9978131874605647)

Quantidade de vezes que o placar foi i x j (linha 5 e coluna 5 representam 5 ou mais gols):
[[1366 1346  625  250   44   15]
 [1344 1354  707  197   59   20]
 [ 713  691  347  114   40    3]
 [ 213  214  102   40   12    2]
 [  56   54   29    7    2    0]
 [  13   12    7    2    0    0]]

O resultado mais provável é 0 x 0.
Além disso, a probabilidade do mandante é 0.3457, enquanto a do visitante é de 0.3434 e a de empate é 0.3109.


In [29]:
df, results = championship(clubs, sims = 1000)
df

Unnamed: 0,points,games,wins,draws,defeats,goals for,goals against,goal difference
C,80,38,22,14,2,32,12,20
T,79,38,21,16,1,33,13,20
H,67,38,15,22,1,31,17,14
F,66,38,17,15,6,25,14,11
I,66,38,17,15,6,23,12,11
N,63,38,15,18,5,26,16,10
S,63,38,14,21,3,22,11,11
G,60,38,14,18,6,22,14,8
Q,51,38,12,15,11,19,18,1
A,49,38,12,13,13,17,18,-1


In [30]:
results

[['A', 0, 1, 'B'],
 ['A', 0, 1, 'C'],
 ['A', 1, 0, 'D'],
 ['A', 0, 1, 'E'],
 ['A', 1, 0, 'F'],
 ['A', 1, 0, 'G'],
 ['A', 0, 1, 'H'],
 ['A', 0, 1, 'I'],
 ['A', 1, 0, 'J'],
 ['A', 1, 1, 'K'],
 ['A', 1, 0, 'L'],
 ['A', 1, 0, 'M'],
 ['A', 0, 1, 'N'],
 ['A', 0, 0, 'O'],
 ['A', 0, 0, 'P'],
 ['A', 0, 0, 'Q'],
 ['A', 0, 0, 'R'],
 ['A', 0, 0, 'S'],
 ['A', 1, 1, 'T'],
 ['B', 1, 1, 'A'],
 ['B', 0, 1, 'C'],
 ['B', 0, 0, 'D'],
 ['B', 0, 1, 'E'],
 ['B', 0, 0, 'F'],
 ['B', 0, 1, 'G'],
 ['B', 1, 1, 'H'],
 ['B', 0, 1, 'I'],
 ['B', 0, 0, 'J'],
 ['B', 0, 1, 'K'],
 ['B', 0, 0, 'L'],
 ['B', 0, 0, 'M'],
 ['B', 1, 0, 'N'],
 ['B', 0, 0, 'O'],
 ['B', 0, 0, 'P'],
 ['B', 0, 0, 'Q'],
 ['B', 1, 0, 'R'],
 ['B', 0, 0, 'S'],
 ['B', 1, 1, 'T'],
 ['C', 1, 0, 'A'],
 ['C', 1, 1, 'B'],
 ['C', 1, 0, 'D'],
 ['C', 1, 0, 'E'],
 ['C', 0, 0, 'F'],
 ['C', 1, 1, 'G'],
 ['C', 1, 1, 'H'],
 ['C', 1, 1, 'I'],
 ['C', 0, 0, 'J'],
 ['C', 1, 1, 'K'],
 ['C', 1, 0, 'L'],
 ['C', 1, 0, 'M'],
 ['C', 1, 1, 'N'],
 ['C', 1, 0, 'O'],
 ['C', 1, 0,

Perceba que fazendo

$força~de~ataque~do~time = \sum força~dos~jogadores \text{ e } força~de~defesa~do~time = \sum \log{força~dos~jogadores},$

um time com maior força de ataque terá maior força de defesa, o que não é compatível com a realidade.

# Usando dois parâmetros por jogador

Cada jogador tem dois parâmetros: ataque e defesa

In [39]:
def generate_players(clubs = 20, players_per_club = 23):
    '''
    creates players
    '''
    n_players = clubs * players_per_club
    players_atk = truncnorm.rvs(0, 10, loc = 2, scale = 0.5, size = n_players)
    players_def = truncnorm.rvs(0, 10, loc = 1, scale = 0.5, size = n_players)
    players = {}
    for i in range(n_players):
        players['P{:04d}'.format(i)] = [round(players_atk[i], 8) if players_atk[i] < 100 else 100,
                                        round(players_def[i], 8) if players_def[i] < 100 else 100]
        
    return players

def change_clubs(clubs, changes):
    '''
    change clubs
    '''
    clubs_f = {}
    changing = []
    change = 0
    for i in range(n_clubs):
        club_players = clubs[chr(i + 65)].copy()
        np.random.shuffle(club_players)
        for j in range(changes[i]):
            changing.append(club_players[j])

    np.random.shuffle(changing)
    for i in range(n_clubs):
        club_players = clubs[chr(i + 65)].copy()
        ind = []
        for player in club_players:
            if player in changing:
                ind.append(club_players.index(player))

        for j in range(len(ind)):
            club_players[ind[j]] = changing[change]
            change += 1

        clubs_f[chr(i + 65)] = club_players

    return clubs_f

def generate_clubs(players, years = 15, n_clubs = 20, players_per_club = 23):
    '''
    receives players and allocates them to clubs
    '''
    clubs = {}
    all_players = list(players)
    for year in range(years):
        line_up = {chr(65 + club) : {} for club in range(n_clubs)}
        clubs[year] = line_up
        for club in range(n_clubs):
            if year == 0:
                clubs[year][chr(65 + club)] = all_players[players_per_club * club:players_per_club * (club + 1)]

            else:
                changes = poisson.rvs(players_per_club/4, size = n_clubs)
                for i in range(n_clubs):
                    if changes[i] > players_per_club:
                        changes[i] = players_per_club

                    clubs[year] = change_clubs(clubs[year - 1], changes)
    return clubs

In [40]:
years = 15
n_clubs = 20
players_per_club = 23
players = generate_players()
clubs = generate_clubs(players)

In [41]:
def find_forces(line_up, players):
    atk_force = 0
    def_force = 0
    for player in line_up:
        atk_force += 1.5*players[player][0]
        def_force += players[player][1]
    
    return atk_force/11, def_force/11

def model(club1, club2, players, sims = 10000):
    '''
    receives 2 clubs (line_up) and returns the most likely result, probability of each result and probability for the game
    '''
    atk1, def1 = find_forces(club1, players)
    atk2, def2 = find_forces(club2, players)
    results = np.zeros((6, 6), dtype = int)
    probs = [0, 0, 0]
    for i in range(sims):
        goals1 = poisson.rvs(atk1 - def2)
        goals2 = poisson.rvs(atk2 - def1)
        if goals1 > 5:
            goals1 = 5
        if goals2 > 5:
            goals2 = 5
        
        results[goals1, goals2] += 1
        
        if goals1 > goals2:
            probs[0] += 1
        elif goals1 == goals2:
            probs[1] += 1
        else:
            probs[2] += 1
    
    for i in range(3):
        probs[i] /= sims
        
    result = [0, 0] # Home - away
    for i in range(len(results)):
        for j in range(len(results[i])):
            if results[i, j] > results[result[0], result[1]]:
                result = [i, j]
            elif results[i, j] == results[result[0], result[1]]:
                # rare cases
                if np.random.random() > 0.5:
                    result = [i, j]
    
    return results, result, probs

In [42]:
def championship(clubs, players, sims = 10000, year = 0):
    games = generate_games()
    results = []
    line_up = []
    table = np.zeros((20, 8), dtype = int)
    # lines = clubs
    # columns = {points, games, wins, draws, defeats, goals for, goals against, goal difference}
    
    for i in range(len(games)):
        home = i
        for away in games[i]:
            away = ord(away) - 65
            
            club1 = clubs[year][chr(home + 65)].copy()
            np.random.shuffle(club1)
            home_line_up = club1[:11]
            
            club2 = clubs[year][chr(away + 65)].copy()
            np.random.shuffle(club2)
            away_line_up = club2[:11]
            
            _, result, _ = model(home_line_up, away_line_up, players, sims = sims)
            results.append([chr(home + 65), result[0], result[1], chr(away + 65)])
            line_up.append([home_line_up, away_line_up])
            
            # games
            table[home, 1] += 1
            table[away, 1] += 1
            
            # goals
            table[home, 5] += result[0]
            table[away, 5] += result[1]
            table[home, 6] += result[1]
            table[away, 6] += result[0]
            table[home, 7] += result[0] - result[1]
            table[away, 7] += result[1] - result[0]
            
            # results
            if result[0] > result[1]:
                table[home, 2] += 1
                table[home, 0] += 3
                table[away, 4] += 1
            elif result[0] == result[1]:
                table[home, 3] += 1
                table[away, 3] += 1
                table[home, 0] += 1
                table[away, 0] += 1
            else:
                table[home, 4] += 1
                table[away, 2] += 1
                table[away, 0] += 3
    
    columns_names = ['points', 'games', 'wins', 'draws', 'defeats', 'goals for', 'goals against', 'goal difference']
    
    table = pd.DataFrame(table,
                        index = [chr(i + 65) for i in range(20)],
                        columns = columns_names)
    table.sort_values(['points',  'wins', 'goal difference', 'goals for'],
                      axis = 0,
                      ascending = False,
                      inplace = True)
    
    return table, results, line_up

In [43]:
table, results, line_up = championship(clubs, players, sims = 1000, year = 0)

In [44]:
table

Unnamed: 0,points,games,wins,draws,defeats,goals for,goals against,goal difference
K,67,38,17,16,5,67,55,12
J,63,38,15,18,5,66,56,10
Q,61,38,16,13,9,60,53,7
C,60,38,14,18,6,67,58,9
M,60,38,13,21,4,71,61,10
S,56,38,12,20,6,72,65,7
B,53,38,11,20,7,70,65,5
I,50,38,9,23,6,68,65,3
L,45,38,8,21,9,62,63,-1
N,45,38,8,21,9,65,68,-3


In [45]:
results

[['A', 1, 2, 'B'],
 ['A', 1, 2, 'C'],
 ['A', 1, 1, 'D'],
 ['A', 2, 1, 'E'],
 ['A', 1, 1, 'F'],
 ['A', 2, 2, 'G'],
 ['A', 2, 1, 'H'],
 ['A', 1, 2, 'I'],
 ['A', 1, 1, 'J'],
 ['A', 1, 2, 'K'],
 ['A', 2, 2, 'L'],
 ['A', 2, 2, 'M'],
 ['A', 2, 2, 'N'],
 ['A', 2, 2, 'O'],
 ['A', 2, 1, 'P'],
 ['A', 1, 2, 'Q'],
 ['A', 1, 1, 'R'],
 ['A', 2, 2, 'S'],
 ['A', 1, 2, 'T'],
 ['B', 2, 1, 'A'],
 ['B', 2, 2, 'C'],
 ['B', 2, 2, 'D'],
 ['B', 2, 1, 'E'],
 ['B', 2, 2, 'F'],
 ['B', 2, 2, 'G'],
 ['B', 2, 2, 'H'],
 ['B', 2, 2, 'I'],
 ['B', 2, 1, 'J'],
 ['B', 2, 1, 'K'],
 ['B', 2, 1, 'L'],
 ['B', 2, 2, 'M'],
 ['B', 2, 2, 'N'],
 ['B', 2, 2, 'O'],
 ['B', 3, 1, 'P'],
 ['B', 1, 2, 'Q'],
 ['B', 2, 2, 'R'],
 ['B', 2, 2, 'S'],
 ['B', 2, 1, 'T'],
 ['C', 2, 2, 'A'],
 ['C', 2, 2, 'B'],
 ['C', 2, 1, 'D'],
 ['C', 2, 1, 'E'],
 ['C', 2, 1, 'F'],
 ['C', 1, 2, 'G'],
 ['C', 2, 2, 'H'],
 ['C', 2, 2, 'I'],
 ['C', 1, 1, 'J'],
 ['C', 2, 2, 'K'],
 ['C', 2, 2, 'L'],
 ['C', 2, 2, 'M'],
 ['C', 2, 2, 'N'],
 ['C', 2, 2, 'O'],
 ['C', 1, 1,

In [46]:
line_up

[[['P0017',
   'P0007',
   'P0011',
   'P0014',
   'P0022',
   'P0001',
   'P0002',
   'P0008',
   'P0010',
   'P0020',
   'P0012'],
  ['P0035',
   'P0033',
   'P0026',
   'P0023',
   'P0042',
   'P0043',
   'P0030',
   'P0040',
   'P0029',
   'P0038',
   'P0025']],
 [['P0001',
   'P0003',
   'P0020',
   'P0009',
   'P0017',
   'P0008',
   'P0014',
   'P0018',
   'P0013',
   'P0022',
   'P0005'],
  ['P0066',
   'P0063',
   'P0047',
   'P0062',
   'P0057',
   'P0055',
   'P0049',
   'P0056',
   'P0050',
   'P0051',
   'P0046']],
 [['P0005',
   'P0009',
   'P0021',
   'P0003',
   'P0022',
   'P0004',
   'P0017',
   'P0018',
   'P0016',
   'P0014',
   'P0006'],
  ['P0090',
   'P0072',
   'P0088',
   'P0079',
   'P0076',
   'P0077',
   'P0074',
   'P0084',
   'P0075',
   'P0071',
   'P0085']],
 [['P0002',
   'P0020',
   'P0019',
   'P0009',
   'P0013',
   'P0017',
   'P0003',
   'P0007',
   'P0018',
   'P0011',
   'P0000'],
  ['P0096',
   'P0101',
   'P0110',
   'P0107',
   'P0098',
   'P0

# Engenharia reversa

In [159]:
players_er = {}
for game in line_up:
    for player in game[0]:
        if player not in players_er:
            players_er[player] = [truncnorm.rvs(0, 10, loc = 3, scale = 1), truncnorm.rvs(0, 10, loc = 1, scale = 1)]
    for player in game[1]:
        if player not in players_er:
            players_er[player] = [truncnorm.rvs(0, 10, loc = 1, scale = 1), truncnorm.rvs(0, 10, loc = 1, scale = 1)]
            
players_er = [players_er[player] for player in players_er]
players_er

[[3.854193793066773, 2.506010810412105],
 [3.240546517025823, 1.9057505252020341],
 [5.245759547274336, 1.0873304099790935],
 [3.5889971608528506, 1.6268390603191554],
 [3.268664495591445, 1.6561143668404297],
 [3.3548625099553027, 1.3732269257075105],
 [3.346741604676704, 2.429218048208243],
 [3.755440338901876, 1.407226204035879],
 [3.530518080036982, 1.5210437204561953],
 [4.2024589672552946, 2.7658382203775185],
 [3.0616082933978945, 2.350091449401981],
 [1.0480428074564718, 1.4171414031619411],
 [1.0937401981257604, 1.0261093058077722],
 [2.798705783375846, 3.007253355900596],
 [1.0704934567917346, 1.6967605602636504],
 [2.2229601317773273, 1.8505348412693814],
 [1.7034069872110327, 3.5767270364254493],
 [1.2155890199454822, 2.77371197084778],
 [2.6032846084574635, 1.4556675094464486],
 [1.4098592819605962, 2.104092046603977],
 [1.7878210299830344, 2.12323451558385],
 [1.026808881086425, 2.5689761890973726],
 [3.3930548506637304, 1.7868201757764681],
 [4.610091474301226, 1.6478757

In [160]:
def find_forces(line_up, players):
    atk_force = 0
    def_force = 0
    for player in line_up:
        atk_force += 1.5*players[player][0]
        def_force += players[player][1]
    
    return atk_force/11, def_force/11

def find_players(players_er):
    players = {}
    for player in players_er:
        if player not in players:
            players[player] = players_er[players_er.index(player)]
    
    return players

def likelihood(players_er, results, line_up):
    '''
    players : parameter list
    results : list
    line_up : list
    '''
    
    players_er = find_players(players_er)
    loglikelihood = 0
    for game in line_up:               
        ind = line_up.index(game)
        atk1, def1 = find_forces(game[0], players_er)
        atk2, def2 = find_forces(game[1], players_er)
        
        loglikelihood += poisson.logpmf(results[ind][1], atk1 - def2)
        loglikelihood += poisson.logpmf(results[ind][2], atk2 - def1)
    
    return loglikelihood

In [161]:
likelihood(players_er, results, line_up)

TypeError: unhashable type: 'list'

In [130]:
minimize(likelihood, players_er, args = (results, line_up))

IndexError: invalid index to scalar variable.

In [145]:
def test(x, y):
    return x[0]**2 + 4*x[0]
    
minimize(test, [1.5, 1], args = (10)).x

array([-2.,  1.])

In [80]:
minimize(likelihood, players, args = (results, line_up))

## fazer players virar uma lista e mudar isso para dicionário dentro da função likelihood

TypeError: float() argument must be a string or a number, not 'dict'

[[2.3835369784607274, 2.4110773646586527],
 [1.6621280040680886, 1.2047788130619042],
 [1.7368165742924595, 1.3134394341961528],
 [1.9306062139994844, 2.335946865218066],
 [1.9675017778378838, 1.9643600806396126],
 [2.4872143711279326, 1.1942496415778687],
 [1.8277939564660888, 1.282697685689624],
 [2.3368594011594834, 1.780932817113245],
 [2.0762723316280316, 1.3210206081663038],
 [2.58244893275309, 1.3967564052199721],
 [1.3802440428875626, 2.332892209204986],
 [3.4700935432252176, 1.328692639007129],
 [1.4479979851415672, 1.2853498272631574],
 [1.6763415777065171, 1.0486629780800663],
 [2.1207895714346714, 1.4122337602584658],
 [1.1700738432209714, 1.6424632885886696],
 [2.003758815273521, 1.3660051953680112],
 [1.0149885032184682, 2.304576459084924],
 [3.288421044139697, 2.585835319060134],
 [1.7682278431398593, 1.2954632770758705],
 [1.687819554425328, 1.9744313621865315],
 [2.645307219587515, 1.8179713374015753],
 [1.787750742751591, 1.028921640932125],
 [1.2922048588768784, 2.10