In [1]:
import numpy as np
import pandas as pd
from scipy.stats import poisson

In [2]:
def generate_games():
    games = [[chr(i + 65) for i in range(20)] for j in range(20)]
    for i in range(20):
        games[i].pop(i)
        
    return games

def generate_players(temps = 15):
    players = list(np.random.normal(70, 10, 220))
    players = [round(player, 1) if player < 100 else 100 for player in players]
    clubs = {}
    for year in range(temps):
        line_up = {}
        if type(players) != list:
            np.random.shuffle(list(players.flat))

        players = np.reshape(players, (20, 11))
        for club in range(20):
            line_up[chr(65 + club)] = players[club]

        clubs[year] = line_up
        
    return clubs

def find_median(array):
    arraycopy = array.copy()
    arraycopy.sort()
    if len(arraycopy) % 2 == 0:
        return (arraycopy[len(arraycopy) // 2 - 1] + arraycopy[len(arraycopy) // 2])/2
    else:
        return arraycopy[len(arraycopy) // 2]
    
def find_forces(clubs, year = 0):
    atk_forces = []
    def_forces = []
    for club in clubs[year]:
        atk_forces.append(np.sum(clubs[year][club]))
        def_forces.append(np.log(np.prod(clubs[year][club])))
            
    atk_med = find_median(atk_forces)
    def_med = find_median(def_forces)
    
    for i in range(len(atk_forces)):
        atk_forces[i] = atk_forces[i] / atk_med * 2
        def_forces[i] /= def_med
            
    return atk_forces, def_forces

def model(club1, club2, sims = 10000):
    '''
    receives 2 clubs (forces) and returns the most likely result, probability of each result and probability for the game
    '''
    results = np.zeros((6, 6), dtype = int)
    probs = [0, 0, 0]
    for i in range(sims):
        goals1 = poisson.rvs(club1[0] - club2[1])
        goals2 = poisson.rvs(club2[0] - club1[1])
        if goals1 > 5:
            goals1 = 5
        if goals2 > 5:
            goals2 = 5
        
        results[goals1, goals2] += 1
        
        if goals1 > goals2:
            probs[0] += 1
        elif goals1 == goals2:
            probs[1] += 1
        else:
            probs[2] += 1
    
    for i in range(3):
        probs[i] /= sims
        
    result = [0, 0] # Home - away
    for i in range(len(results)):
        for j in range(len(results[i])):
            if results[i, j] > results[result[0], result[1]]:
                result = [i, j]
            elif results[i, j] == results[result[0], result[1]]:
                # rare cases
                if np.random.random() > 0.5:
                    result = [i, j]
    
    return results, result, probs

In [3]:
def championship(clubs, sims = 10000, year = 0):
    atk_forces, def_forces = find_forces(clubs, year = year)
    games = generate_games()
    results = []
    table = np.zeros((20, 8), dtype = int)
    # lines = clubs
    # columns = {points, games, wins, draws, defeats, goals for, goals against, goal difference}
    
    for i in range(len(games)):
        home = i
        for away in games[i]:
            away = ord(away) - 65
            club1 = (atk_forces[home], def_forces[home])
            club2 = (atk_forces[away], def_forces[away])
            _, result, _ = model(club1, club2, sims = sims)
            results.append([chr(home + 65), result[0], result[1], chr(away + 65)])
            
            # games
            table[home, 1] += 1
            table[away, 1] += 1
            
            # goals
            table[home, 5] += result[0]
            table[away, 5] += result[1]
            table[home, 6] += result[1]
            table[away, 6] += result[0]
            table[home, 7] += result[0] - result[1]
            table[away, 7] += result[1] - result[0]
            
            # results
            if result[0] > result[1]:
                table[home, 2] += 1
                table[home, 0] += 3
                table[away, 4] += 1
            elif result[0] == result[1]:
                table[home, 3] += 1
                table[away, 3] += 1
                table[home, 0] += 1
                table[away, 0] += 1
            else:
                table[home, 4] += 1
                table[away, 2] += 1
                table[away, 0] += 3
    
    columns_names = ['points', 'games', 'wins', 'draws', 'defeats', 'goals for', 'goals against', 'goal difference']
    
    table = pd.DataFrame(table,
                        index = [chr(i + 65) for i in range(20)],
                        columns = columns_names)
    table.sort_values(['goals for', 'goal difference', 'wins', 'points'],
                      axis = 0,
                      ascending = False,
                      inplace = True)
    
    return table, results

In [4]:
clubs = generate_players(temps = 1)
atk_forces, def_forces = find_forces(clubs)

# exemplo
club1 = (atk_forces[ord('A') - 65], def_forces[ord('A') - 65])
club2 = (atk_forces[ord('B') - 65], def_forces[ord('B') - 65])
results, result, probs = model(club1, club2, sims = 10000)

print('Forças do clube 1:', club1)
print('Forças do clube 2:', club2)
print()
print('Quantidade de vezes que o placar foi i x j (linha 5 e coluna 5 representam 5 ou mais gols):')
print(results)
print()
print('O resultado mais provável é {} x {}.'.format(result[0], result[1]))
print('Além disso, a probabilidade do mandante é {}, enquanto a do visitante é de {} e a de empate é {}.'.format(probs[0], probs[2], probs[1]))

Forças do clube 1: (2.1191655801825298, 1.0138194870899473)
Forças do clube 2: (1.9791395045632334, 0.997557008746462)

Quantidade de vezes que o placar foi i x j (linha 5 e coluna 5 representam 5 ou mais gols):
[[1177 1194  587  204   46    8]
 [1332 1380  606  200   47   18]
 [ 766  824  385  115   27    7]
 [ 299  275  157   51    7    2]
 [  86   91   37    9    3    1]
 [  25   24    5    4    1    0]]

O resultado mais provável é 1 x 1.
Além disso, a probabilidade do mandante é 0.3935, enquanto a do visitante é de 0.3069 e a de empate é 0.2996.


In [5]:
df, results = championship(clubs, sims = 10000)
df

Unnamed: 0,points,games,wins,draws,defeats,goals for,goals against,goal difference
R,88,38,25,13,0,38,13,25
A,80,38,21,17,0,38,17,21
G,80,38,21,17,0,38,17,21
Q,78,38,20,18,0,38,18,20
M,76,38,19,19,0,38,19,19
F,72,38,17,21,0,38,21,17
N,75,38,19,18,1,35,17,18
H,67,38,17,16,5,29,17,12
E,57,38,12,21,5,25,18,7
P,57,38,15,12,11,19,15,4


In [6]:
results

[['A', 1, 0, 'B'],
 ['A', 1, 0, 'C'],
 ['A', 1, 0, 'D'],
 ['A', 1, 1, 'E'],
 ['A', 1, 1, 'F'],
 ['A', 1, 1, 'G'],
 ['A', 1, 1, 'H'],
 ['A', 1, 0, 'I'],
 ['A', 1, 0, 'J'],
 ['A', 1, 0, 'K'],
 ['A', 1, 0, 'L'],
 ['A', 1, 1, 'M'],
 ['A', 1, 1, 'N'],
 ['A', 1, 0, 'O'],
 ['A', 1, 0, 'P'],
 ['A', 1, 1, 'Q'],
 ['A', 1, 1, 'R'],
 ['A', 1, 0, 'S'],
 ['A', 1, 0, 'T'],
 ['B', 1, 1, 'A'],
 ['B', 1, 0, 'C'],
 ['B', 0, 0, 'D'],
 ['B', 0, 1, 'E'],
 ['B', 1, 1, 'F'],
 ['B', 0, 1, 'G'],
 ['B', 0, 1, 'H'],
 ['B', 0, 0, 'I'],
 ['B', 0, 0, 'J'],
 ['B', 1, 0, 'K'],
 ['B', 1, 0, 'L'],
 ['B', 1, 1, 'M'],
 ['B', 0, 1, 'N'],
 ['B', 0, 1, 'O'],
 ['B', 0, 1, 'P'],
 ['B', 0, 1, 'Q'],
 ['B', 0, 1, 'R'],
 ['B', 0, 0, 'S'],
 ['B', 0, 0, 'T'],
 ['C', 0, 1, 'A'],
 ['C', 0, 0, 'B'],
 ['C', 0, 0, 'D'],
 ['C', 0, 1, 'E'],
 ['C', 0, 1, 'F'],
 ['C', 0, 1, 'G'],
 ['C', 0, 1, 'H'],
 ['C', 0, 1, 'I'],
 ['C', 0, 0, 'J'],
 ['C', 0, 0, 'K'],
 ['C', 0, 0, 'L'],
 ['C', 0, 1, 'M'],
 ['C', 0, 1, 'N'],
 ['C', 0, 1, 'O'],
 ['C', 0, 1,