In [6]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import truncnorm
import time
import csv


def get_s_cond_t_params(player_1_sigma, player_2_sigma, player_1_mean, player_2_mean, t_i):
    beta = 3  # från uppgiften

    # Covariance calculations
    covariance_t_given_s = beta
    covariance_ss = np.array([[player_1_sigma, 0], [0, player_2_sigma]])
    A = np.array([1, -1]).reshape([1, 2])
    inv_covariance_ss = np.linalg.inv(covariance_ss)
    ACA = np.matmul(A.T, A) * (1 / covariance_t_given_s)
    covariance_s_cond_t = np.linalg.inv(inv_covariance_ss + ACA)

    # Mean calculations
    player_means = np.array([player_1_mean, player_2_mean]).reshape([2, 1])
    a = covariance_s_cond_t
    b = np.matmul(inv_covariance_ss, player_means)
    c = A.T * (1 / covariance_t_given_s) * t_i
    mean_s_cond_t = np.matmul(a, b + c)

    return mean_s_cond_t, covariance_s_cond_t


def P_s_cond_t(t_i, player_1_mean, player_1_sigma, player_2_mean, player_2_sigma):
    # player_1_mean = 25; player_2_mean = 25; player_1_sigma = 8.3**2; player_2_sigma = 8.3**2

    mean_s_cond_t, cov_s_cond_t = get_s_cond_t_params(player_1_mean=player_1_mean, player_1_sigma=player_1_sigma,
                                                      player_2_mean=player_2_mean, player_2_sigma=player_2_sigma,
                                                      t_i=t_i)

    return np.random.multivariate_normal(mean=mean_s_cond_t.reshape(2), cov=cov_s_cond_t,
                                         check_valid='warn', tol=1e-8)


def P_t_cond_s(s_i, t_game):
    s_diff = s_i[0] - s_i[1]
    beta = 3
    t_sigma = beta

    if t_game > 0:  # case for when y=1
        a, b = (0 - s_diff) / t_sigma, np.inf
        t = truncnorm.rvs(a, b) * t_sigma + s_diff
        return t
    elif t_game < 0:  # case for when y=-1
        a, b = -np.inf, (0 - s_diff) / t_sigma
        t = truncnorm.rvs(a, b) * t_sigma + s_diff
        return t
    else:
        print("ERROR, TIES PRESENTLY NOT ALLOWED")


        
def gibbs_sampler(L, player_1_stats, player_2_stats, t_game):
    player_1_mean, player_1_sigma = player_1_stats
    player_2_mean, player_2_sigma = player_2_stats
    
    s_i = [player_1_mean, player_2_mean]

    t_obs = np.zeros(L)
    s_obs = np.zeros([L, 2])

    for i in range(L):
        t_i_plus_1 = P_t_cond_s(s_i, t_game=t_game)
        s_i_plus_1 = P_s_cond_t(t_i_plus_1, player_1_mean, player_1_sigma, player_2_mean, player_2_sigma)

        t_obs[i] = t_i_plus_1
        s_obs[i, :] = s_i_plus_1

        s_i = s_i_plus_1
        # plt.scatter(s_obs[:, 0], s_obs[:, 1])
        # plt.pause(0.1)
    # plt.show()

    # plt.plot(s_obs[:, 0]); plt.plot(s_obs[:, 1]); plt.show()
    return s_obs, t_obs



def player_stats_estimate_from_obs(s_obs):
    player_1_stats_estimate = [np.mean(s_obs[:, 0]), np.var(s_obs[:, 0])] # [mean, variance] of samples
    player_2_stats_estimate = [np.mean(s_obs[:, 1]), np.var(s_obs[:, 1])] # [mean, variance] of samples
    
    return player_1_stats_estimate, player_2_stats_estimate


    

def make_stats_dictionary(filename, stats_dictionary, printable):
    with open(filename) as csvfile:
        reader = csv.DictReader(csvfile)
        mean = 100; variance = (100 / 3) ** 2  # TrueSkill prior parameters before any games
        gamelist = []
        for row in reader:
            # Create dictionary (=map) with keyword 'team' and value [mean, variance]
            stats_dictionary[row['team1']] = [mean, variance]
            # Add teams and result to list
            gamelist.append([row['team1'], row['team2'], int(row['result'])])

            if printable == 1:  # Print the whole list
                print(f"{row['team1']} vs {row['team2']}: {row['result']}")  # Access by column header
                      
    #stats_dictionary = random.shuffle(stats_dictionary, random)

    return stats_dictionary, gamelist



# Ranking by mean (should perhaps be improved to mean - 3 * sigma)
def ranking(stats_dictionary):
    sorted_teams = sorted(stats_dictionary.items(), key=lambda x: x[1], reverse=True)
    print("\nList of teams ranked by mean skill in descending order:\n")
    for i in sorted_teams:
        print(i[0], i[1])

        
def predict_winner(team1_string, team2_string, stats_dictionary):
    diff = stats_dictionary[team1_string][0] - stats_dictionary[team2_string][0] 
    #print('statdic team1 : ', stats_dictionary[team1_string][0])
    #print('statdic team2 : ', stats_dictionary[team2_string][0])
    print('\t diff: ', diff)
    
    if diff > 0:
        return 1
    elif diff < 0:
        return -1
    else:
        return 1
            


In [7]:
# Make dictionary of team stats (mean and variance) & list of all games with result
stats_dictionary, result_list = make_stats_dictionary('syntheticSerieA.csv', stats_dictionary={}, printable=0)
stats_dictionary

{'Chievo': [100, 1111.1111111111113],
 'Lazio': [100, 1111.1111111111113],
 'Torino': [100, 1111.1111111111113],
 'Sassuolo': [100, 1111.1111111111113],
 'Parma': [100, 1111.1111111111113],
 'Empoli': [100, 1111.1111111111113],
 'Bologna': [100, 1111.1111111111113],
 'Atalanta': [100, 1111.1111111111113],
 'Juventus': [100, 1111.1111111111113],
 'Napoli': [100, 1111.1111111111113],
 'Spal': [100, 1111.1111111111113],
 'Udinese': [100, 1111.1111111111113],
 'Inter': [100, 1111.1111111111113],
 'Genoa': [100, 1111.1111111111113],
 'Frosinone': [100, 1111.1111111111113],
 'Fiorentina': [100, 1111.1111111111113],
 'Cagliari': [100, 1111.1111111111113],
 'Roma': [100, 1111.1111111111113],
 'Milan': [100, 1111.1111111111113],
 'Sampdoria': [100, 1111.1111111111113]}

In [8]:
result_list

[['Chievo', 'Lazio', 1],
 ['Chievo', 'Torino', 1],
 ['Chievo', 'Sassuolo', 1],
 ['Chievo', 'Parma', 1],
 ['Chievo', 'Empoli', 1],
 ['Chievo', 'Bologna', -1],
 ['Chievo', 'Atalanta', -1],
 ['Chievo', 'Juventus', 1],
 ['Chievo', 'Napoli', 1],
 ['Chievo', 'Spal', 1],
 ['Chievo', 'Udinese', 1],
 ['Chievo', 'Inter', 1],
 ['Chievo', 'Genoa', 1],
 ['Chievo', 'Frosinone', 1],
 ['Chievo', 'Fiorentina', 1],
 ['Chievo', 'Cagliari', -1],
 ['Chievo', 'Roma', 1],
 ['Chievo', 'Milan', 1],
 ['Chievo', 'Sampdoria', 1],
 ['Lazio', 'Torino', 1],
 ['Lazio', 'Sassuolo', 1],
 ['Lazio', 'Parma', 1],
 ['Lazio', 'Empoli', -1],
 ['Lazio', 'Bologna', -1],
 ['Lazio', 'Atalanta', -1],
 ['Lazio', 'Juventus', -1],
 ['Lazio', 'Napoli', 1],
 ['Lazio', 'Spal', 1],
 ['Lazio', 'Udinese', 1],
 ['Lazio', 'Inter', -1],
 ['Lazio', 'Genoa', -1],
 ['Lazio', 'Frosinone', -1],
 ['Lazio', 'Fiorentina', -1],
 ['Lazio', 'Cagliari', -1],
 ['Lazio', 'Roma', 1],
 ['Lazio', 'Milan', 1],
 ['Lazio', 'Sampdoria', 1],
 ['Torino', 'Sassuolo

In [9]:

# Make dictionary of team stats (mean and variance) & list of all games with result
stats_dictionary, result_list = make_stats_dictionary('syntheticSerieA.csv', stats_dictionary={}, printable=0)

correct_predictions = 0
nr_of_draws = 0

''' 
UNCOMMENT THIS SECTION TO GET A RANDOMIXED ORDERING OF THE MATCHES. 
# 
If you want to randomize

'''

x = np.random.choice(range(len(result_list)), size=len(result_list), replace=False)
randomized_list = []
for i in x:
    randomized_list.append(result_list[i])
result_list = randomized_list

for i in range(len(result_list)):
    # result_list holds [team1, team2, result = score1 - score2]
    print(f"\nResult game {i}:   {result_list[i]}")
    team1 = result_list[i][0]
    team2 = result_list[i][1]
    
    print('team1: ', team1)
    print('team2: ', team2)
    result = result_list[i][2]
    
    # stats_dictionary with keyword 'teamname' and value [mean, variance]
    print(f"Stats before game: {stats_dictionary[team1]}, {stats_dictionary[team2]}")

    if result == 0:  # ignore tied games for now
        print("Game ignored due to tie")
        print(f"Stats after game:  {stats_dictionary[team1]}, {stats_dictionary[team2]}")
        nr_of_draws += 1
        print("Draws: ", nr_of_draws)
    else:
    
        # prediktera mat resultat och spara prediktion
        pred_result = predict_winner(team1, team2, stats_dictionary)
        
        if np.sign(result) == np.sign(pred_result):
                correct_predictions += 1
                print("Correct pred: ", correct_predictions)
        
        print('true result: ', result)
        print('pred result: ', pred_result)
                
        s_obs, t_obs =  gibbs_sampler(L=5000,
                                      player_1_stats=stats_dictionary[team1], 
                                      player_2_stats=stats_dictionary[team2],
                                      t_game=result)
        
        burn_in_value = 1000
        s_obs = s_obs[burn_in_value:]
        player_1_stats_posterior, player_2_stats_posterior = player_stats_estimate_from_obs(s_obs)
        
        
        # Update team stats so posterior makes new prior
        stats_dictionary[team1] = player_1_stats_posterior
        stats_dictionary[team2] = player_2_stats_posterior


        print(f"Stats AFTER game: {stats_dictionary[team1]}, {stats_dictionary[team2]}")

print()
print("correct_predictions ", correct_predictions)
print("reslist: ", len(result_list))
print("draws: ", nr_of_draws)
print('performance: ', correct_predictions / (len(result_list)-nr_of_draws))



Result game 0:   ['Inter', 'Cagliari', -1]
team1:  Inter
team2:  Cagliari
Stats before game: [100, 1111.1111111111113], [100, 1111.1111111111113]
	 diff:  0
true result:  -1
pred result:  1
Stats AFTER game: [70.7006757274811, 1085.715555142342], [128.22880628846198, 1084.8310195095082]

Result game 1:   ['Roma', 'Juventus', -1]
team1:  Roma
team2:  Juventus
Stats before game: [100, 1111.1111111111113], [100, 1111.1111111111113]
	 diff:  0
true result:  -1
pred result:  1
Stats AFTER game: [43.3790885066012, 902.1955403250577], [155.4058108911944, 913.3828923834038]

Result game 2:   ['Milan', 'Cagliari', -1]
team1:  Milan
team2:  Cagliari
Stats before game: [100, 1111.1111111111113], [128.22880628846198, 1084.8310195095082]
	 diff:  -28.228806288461982
Correct pred:  1
true result:  -1
pred result:  -1
Stats AFTER game: [58.44776148324277, 1430.4746697515245], [168.03721926047032, 1407.1751170586433]

Result game 3:   ['Fiorentina', 'Torino', 1]
team1:  Fiorentina
team2:  Torino
Stat

In [10]:
ranking(stats_dictionary
       )
# andra körning. 


List of teams ranked by mean skill in descending order:

Atalanta [358.35215056285364, 2637.123931095306]
Bologna [341.4780404715636, 1366.7535953728]
Cagliari [256.5504994604685, 3824.0521729002185]
Empoli [195.71795037640305, 1214.9978102373534]
Chievo [163.0816779632476, 2309.6339693771124]
Genoa [154.39672935806422, 395.75165581672815]
Fiorentina [153.8818569757917, 542.026301156737]
Inter [133.20447676292522, 412.0370662326218]
Juventus [130.12625739040485, 706.5068690855744]
Frosinone [106.30013381354591, 624.0510987138362]
Lazio [98.8213757247917, 159.50233332867208]
Napoli [88.71989197487105, 941.171887447966]
Milan [75.29652254330121, 191.80452563217807]
Roma [67.62240986671044, 554.4106182996662]
Parma [29.43925911592521, 1825.4137918473673]
Sampdoria [-6.483511259814612, 903.1317286679127]
Sassuolo [-61.96853113810226, 1318.8060452351638]
Spal [-65.2432350787727, 1611.8441484241173]
Torino [-130.26781341817488, 1706.4732009423597]
Udinese [-186.7378617942173, 1690.180697045