In [1]:
import numpy as np
import pandas as pd

In [2]:
from collections import defaultdict

In [3]:
import pickle

In [4]:
from sklearn.base import BaseEstimator, TransformerMixin

In [5]:
class FeatureSelectionTransformer(BaseEstimator, TransformerMixin):
#     def __init__(self):
#         pass
    
    def __init__(self, type_feat = 'weighted_mean_10'):
        self.type_feat = type_feat
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X):
        cols_feat = X.columns
        cols_feat_selected = [col for col in cols_feat if self.type_feat in col]
        
        cols_feat_selected.sort()
        
        return X[cols_feat_selected]

In [6]:
model = pickle.load(open('model_goals.pkl', 'rb'))

In [7]:
model_2 = pickle.load(open('model_goals_2.pkl', 'rb'))

In [8]:
data_schedule = [['Italy', 'Belgium', 'Netherlands', 'England', 'Sweden', 'France'],
                 ['Wales', 'Denmark', 'Austria', 'Croatia', 'Spain', 'Germany'],
                 ['Switzerland', 'Finland', 'Ukraine', 'Czech Republic', 'Slovakia', 'Portugal'],
                 ['Turkey', 'Russia', 'North Macedonia', 'Scotland', 'Poland', 'Hungary']
                ]

In [9]:
df_schedule = pd.DataFrame(data = data_schedule, columns = ['Group ' + L for L in ['A', 'B', 'C', 'D', 'E', 'F']])

In [10]:
df_schedule

Unnamed: 0,Group A,Group B,Group C,Group D,Group E,Group F
0,Italy,Belgium,Netherlands,England,Sweden,France
1,Wales,Denmark,Austria,Croatia,Spain,Germany
2,Switzerland,Finland,Ukraine,Czech Republic,Slovakia,Portugal
3,Turkey,Russia,North Macedonia,Scotland,Poland,Hungary


In [11]:
import itertools

In [12]:
df_feat_teams = pd.read_excel('features_last_game.xlsx', encoding = 'iso-8859-1')

In [13]:
def prepare_games_in_group(list_of_teams, df_feat_teams):
    df_schedule_group = pd.DataFrame(data = list(itertools.combinations(list_of_teams, 2)), columns = ['team_A', 'team_B'])
    
    df_feat_teams_A = df_feat_teams.copy()
    df_feat_teams_A.columns = [col + '_A' for col in df_feat_teams_A.columns]
    
    df_feat_teams_B = df_feat_teams.copy()
    df_feat_teams_B.columns = [col + '_B' for col in df_feat_teams_B.columns]
    
    df_schedule_group_feat = pd.merge(df_schedule_group, df_feat_teams_A, how = 'inner', on = 'team_A')
    df_schedule_group_feat = pd.merge(df_schedule_group_feat, df_feat_teams_B, how = 'inner', on = 'team_B')
    
    return df_schedule_group_feat

In [14]:
def change_A_B_in_cols(name):
    name_new = name
    if name[-1] == 'A':
        name_new = name.replace('_A', '_B')
    elif name[-1] == 'B':
        name_new = name.replace('_B', '_A')
    
    return name_new

In [15]:
from random import choices

In [16]:
from collections import Counter

In [17]:
def simulate_game(p_goals_A, p_goals_B):
    population_goals = [i for i in range(10)]
    goals_sim_A = choices(population_goals, p_goals_A)[0]
    goals_sim_B = choices(population_goals, p_goals_B)[0]
    
    return goals_sim_A, goals_sim_B

In [18]:
def simulate_game_without_draw(p_goals_A, p_goals_B):
    population_goals = [i for i in range(10)]
    goals_sim_A, goals_sim_B = 0, 0
    
    while(goals_sim_A == goals_sim_B):
        goals_sim_A = choices(population_goals, p_goals_A)[0]
        goals_sim_B = choices(population_goals, p_goals_B)[0]
    
    return goals_sim_A, goals_sim_B

In [19]:
def compute_points(goals_1, goals_2):
    if goals_1 > goals_2:
        return 3
    elif goals_1 == goals_2:
        return 1
    else:
        return 0

In [20]:
def change_A_B_in_cols(name):
    if name[-2:] == '_A':
        return name[:-2] + '_B'
    else:
        return name[:-2] + '_A'

In [21]:
def get_goal_prob_for_pairings(df_feat, df_games, model):
    df_feat_A = df_feat.copy()
    cols_A = [col + '_A' for col in df_feat_A.columns]
    df_feat_A.columns = cols_A

    df_feat_B = df_feat.copy()
    cols_B = [col + '_B' for col in df_feat_B.columns]
    df_feat_B.columns = cols_B

    df_feat_goals_A = pd.merge(df_games.copy(), df_feat_A, how = 'inner', on = 'team_A')
    df_feat_goals_A = pd.merge(df_feat_goals_A, df_feat_B, how = 'inner', on = 'team_B')

    df_feat_goals_B = df_games.copy()
    df_feat_goals_B.columns = ['team_B', 'team_A']
    df_feat_goals_B = pd.merge(df_feat_goals_B, df_feat_B, how = 'inner', on = 'team_B')
    df_feat_goals_B = pd.merge(df_feat_goals_B, df_feat_A, how = 'inner', on = 'team_A')
    
    prob_distr_goals_A = model.predict_proba(df_feat_goals_A)
    prob_distr_goals_B = model.predict_proba(df_feat_goals_B)
    
    return prob_distr_goals_A, prob_distr_goals_B

In [22]:
class Group():
    def __init__(self, list_of_teams, df_feat, model):
        self.list_of_teams = list_of_teams
        self.list_of_pairings = list(itertools.combinations(self.list_of_teams, 2))
        
        self.df_pairings = pd.DataFrame(data = self.get_pairings(), columns = ['team_A', 'team_B'])
        
        self.prob_distr_goals_A, self.prob_distr_goals_B = get_goal_prob_for_pairings(df_feat, self.df_pairings, model)
        
    def get_pairings(self):
        return self.list_of_pairings
    
    def simulate_group(self):
        population_goals = [i for i in range(10)]
        df_results = self.df_pairings.copy()
        
        prob_distr_goals_games = list(zip(self.prob_distr_goals_A, self.prob_distr_goals_B))
        
        goals_sim = [simulate_game(prob_distr[0], prob_distr[1]) for prob_distr in prob_distr_goals_games]
        
        goals_sim_team_A = [goals[0] for goals in goals_sim]
        goals_sim_team_B = [goals[1] for goals in goals_sim]
                
        df_results['goals_sim_A'], df_results['goals_sim_B'] = goals_sim_team_A, goals_sim_team_B
        
        df_results['points_sim_A'] = df_results.apply(lambda row : compute_points(row['goals_sim_A'], row['goals_sim_B']), axis = 1)
        df_results['points_sim_B'] = df_results.apply(lambda row : compute_points(row['goals_sim_B'], row['goals_sim_A']), axis = 1)
        
        df_results_mirrored = df_results.copy()
        df_results_mirrored.columns = [change_A_B_in_cols(col) for col in df_results_mirrored.columns]
        
        df_results = pd.concat([df_results, df_results_mirrored], axis = 0, sort = False).reset_index(drop = True)
        df_results['victories_sim_A'] = df_results['points_sim_A'].apply(lambda points : int(points == 3))
        
        df_results = df_results.groupby('team_A')['points_sim_A', 'goals_sim_A', 'goals_sim_B', 'victories_sim_A'].agg('sum').reset_index()
        df_results['difference_sim_A'] = df_results.apply(lambda row : row['goals_sim_A'] - row['goals_sim_B'], axis = 1)
        
        df_results = df_results.sort_values(by = ['points_sim_A', 'difference_sim_A', 'goals_sim_A'], ascending = [0, 0, 0])
        df_results = df_results.reset_index(drop = True)
    
        cols_third_team = ['team_A', 'points_sim_A', 'difference_sim_A', 'goals_sim_A', 'victories_sim_A']
    
        return list(df_results['team_A'].values[:2]) +  list(df_results[cols_third_team].iloc[2].values)

In [23]:
class EURO2020():
    def __init__(self, df_schedule, df_feat, model):
        self.groups = [Group(df_schedule[col].values, df_feat, model) for col in df_schedule]
    
    def matrix_best_thirds(self, df_best_4_thirds):
        best_4_thirds = df_best_4_thirds['group'].sum()
        
        dict_opponents = {'ABCD' : ['A', 'D', 'B', 'C'],
                          'ABCE' : ['A', 'E', 'B', 'C'],
                          'ABCF' : ['A', 'F', 'B', 'C'],
                          'ABDE' : ['D', 'E', 'A', 'B'],
                          'ABDF' : ['D', 'F', 'A', 'B'],
                          'ABEF' : ['E', 'F', 'B', 'A'],
                          'ACDE' : ['E', 'D', 'C', 'A'],
                          'ACDF' : ['F', 'D', 'C', 'A'],
                          'ACEF' : ['E', 'F', 'C', 'A'],
                          'ADEF' : ['E', 'F', 'D', 'A'],
                          'BCDE' : ['E', 'D', 'B', 'C'],
                          'BCDF' : ['F', 'D', 'C', 'B'],
                          'BCEF' : ['F', 'E', 'C', 'B'],
                          'BDEF' : ['F', 'E', 'D', 'B'],
                          'CDEF' : ['F', 'E', 'D', 'C']
                         }
    
    def compute_round_of_last_16(self, results_groups):
        data_thirds = [result_group[2:] for result_group in results_groups]
        
        df_ranking_thirds = pd.DataFrame(data = data_thirds, columns = ['team', 'points', 'goal_difference', 'goals', 'victories'])
        df_ranking_thirds['group'] = ['A', 'B', 'C', 'D', 'E', 'F']
        
        df_ranking_thirds = df_ranking_thirds.sort_values(by = ['points', 'goal_difference', 'goals', 'victories'], ascending = [0, 0, 0, 0]).reset_index(drop = True)
        
        df_best_4_thirds = df_ranking_thirds[['team', 'group']].iloc[:4].sort_values(by = ['group']).reset_index(drop = True)
        
        best_4_thirds = df_best_4_thirds['group'].sum()
        
        dict_group_to_team = dict(zip(df_best_4_thirds['group'], df_best_4_thirds['team']))
        
        dict_opponents_thirds = {'ABCD' : ['A', 'D', 'B', 'C'],
                                 'ABCE' : ['A', 'E', 'B', 'C'],
                                 'ABCF' : ['A', 'F', 'B', 'C'],
                                 'ABDE' : ['D', 'E', 'A', 'B'],
                                 'ABDF' : ['D', 'F', 'A', 'B'],
                                 'ABEF' : ['E', 'F', 'B', 'A'],
                                 'ACDE' : ['E', 'D', 'C', 'A'],
                                 'ACDF' : ['F', 'D', 'C', 'A'],
                                 'ACEF' : ['E', 'F', 'C', 'A'],
                                 'ADEF' : ['E', 'F', 'D', 'A'],
                                 'BCDE' : ['E', 'D', 'B', 'C'],
                                 'BCDF' : ['F', 'D', 'C', 'B'],
                                 'BCEF' : ['F', 'E', 'C', 'B'],
                                 'BDEF' : ['F', 'E', 'D', 'B'],
                                 'CDEF' : ['F', 'E', 'D', 'C']
                                }
        
        round_of_last_16 = [[results_groups[1][0], dict_group_to_team[dict_opponents_thirds[best_4_thirds][0]]],
                            [results_groups[0][0], results_groups[2][1]],
                            [results_groups[5][0], dict_group_to_team[dict_opponents_thirds[best_4_thirds][3]]],
                            [results_groups[3][1], results_groups[4][1]],
                            [results_groups[4][0], dict_group_to_team[dict_opponents_thirds[best_4_thirds][2]]],
                            [results_groups[3][0], results_groups[5][1]],
                            [results_groups[2][0], dict_group_to_team[dict_opponents_thirds[best_4_thirds][1]]],
                            [results_groups[0][1], results_groups[1][1]]
                           ]

        return round_of_last_16
    
    def simulate_knockout_phase(self, df_knockout_phase, df_feat_teams, model):
        prob_distr_goals_A, prob_distr_goals_B = get_goal_prob_for_pairings(df_feat_teams, df_knockout_phase, model)
        
        prob_distr_goals_games = list(zip(prob_distr_goals_A, prob_distr_goals_B))
        
        goals_sim = [simulate_game_without_draw(prob_distr[0], prob_distr[1]) for prob_distr in prob_distr_goals_games]
        
        goals_sim_team_A = [goals[0] for goals in goals_sim]
        goals_sim_team_B = [goals[1] for goals in goals_sim]
                
        df_knockout_phase['goals_sim_A'], df_knockout_phase['goals_sim_B'] = goals_sim_team_A, goals_sim_team_B
        
        df_knockout_phase['winner'] = df_knockout_phase.apply(lambda row : row['team_A'] if row['goals_sim_A'] > row['goals_sim_B'] else row['team_B'], axis = 1)

        return df_knockout_phase['winner'].values
    
    def simulate_tournament(self, df_feat_teams, model):
        results_groups = [g.simulate_group() for g in self.groups]

        winners_groups = [result_group[0] for result_group in results_groups]
        
        # Simulation round of last sixteen
        games_ko_last_16 = self.compute_round_of_last_16(results_groups)      
        df_ko_last_16 = pd.DataFrame(data = games_ko_last_16, columns = ['team_A', 'team_B'])
        winners_ko_last_16 = self.simulate_knockout_phase(df_ko_last_16, df_feat_teams, model)
        
        games_ko_last_8 = [winners_ko_last_16[2*i: 2*(i+1)] for i in range(len(winners_ko_last_16)//2)]
        df_ko_last_8 = pd.DataFrame(data = games_ko_last_8, columns = ['team_A', 'team_B'])
        winners_ko_last_8 = self.simulate_knockout_phase(df_ko_last_8, df_feat_teams, model)
        
        games_ko_last_4 = [winners_ko_last_8[2*i: 2*(i+1)] for i in range(len(winners_ko_last_8)//2)]
        df_ko_last_4 = pd.DataFrame(data = games_ko_last_4, columns = ['team_A', 'team_B'])
        winners_ko_last_4 = self.simulate_knockout_phase(df_ko_last_4, df_feat_teams, model)
        
        games_ko_last_2 = [winners_ko_last_4[2*i: 2*(i+1)] for i in range(len(winners_ko_last_4)//2)]
        df_ko_last_2 = pd.DataFrame(data = games_ko_last_2, columns = ['team_A', 'team_B'])
        winners_ko_last_2 = self.simulate_knockout_phase(df_ko_last_2, df_feat_teams, model)
        
        return winners_groups, list(winners_ko_last_8), list(winners_ko_last_2)
    
    def monte_carlo(self, df_feat_teams, model, n):
        dicts_winners_groups = [defaultdict(int) for i in range(6)]
        dict_semi_finals = defaultdict(int)
        dict_champion = defaultdict(int)
        
        for i in range(n):
            winners_groups, semi_finals, champion = self.simulate_tournament(df_feat_teams, model)
            
            for i in range(len(winners_groups)):
                dicts_winners_groups[i][winners_groups[i]] += 1
                
            for i in range(len(semi_finals)):
                dict_semi_finals[semi_finals[i]] += 1
                
            dict_champion[champion[0]] += 1
        
        print('Winners Groups')
        for i in range(len(dicts_winners_groups)):
            print(dicts_winners_groups[i])
            
        print('Semi Finals: ')
        print(dict_semi_finals)
        
        print('Champion: ')
        print(dict_champion)

In [24]:
euro2020 = EURO2020(df_schedule, df_feat_teams, model_2)

In [25]:
# euro2020.monte_carlo(df_feat_teams, model_2, 500)

In [50]:
def get_result_max_points(p_goals_A, p_goals_B):
    p_goals_A = np.reshape(p_goals_A, (10, 1))
    p_goals_B = np.reshape(p_goals_B, (1, 10))
    
    mat_prob = np.matmul(p_goals_A, p_goals_B)
    print(mat_prob)
    
    max_expected_points_draw = 2*sum(np.diagonal(mat_prob, 0)) +2*max(np.diagonal(mat_prob, 0))
    print(max_expected_points_draw)

    prob_win_A = sum([sum(np.diagonal(mat_prob, -i)) for i in range(1, 10)])
    prob_win_B = sum([sum(np.diagonal(mat_prob, i)) for i in range(1, 10)])

    max_prob_A = 0
    best_win_A = (1, 0)
    
    max_prob_B = 0
    best_win_B = (0, 1)
    
    for i in range(1, 10):
        if max(max_prob_A, sum(np.diagonal(mat_prob, -i)) + max(np.diagonal(mat_prob, -i))) > max_prob_A:
            max_prob_A = max(max_prob_A, sum(np.diagonal(mat_prob, -i)) + max(np.diagonal(mat_prob, -i)))
            best_win_A = (i + np.argmax(np.diagonal(mat_prob, -i)), np.argmax(np.diagonal(mat_prob, -i)))
        
        if max(max_prob_B, sum(np.diagonal(mat_prob, i)) + max(np.diagonal(mat_prob, i))) > max_prob_B:
            max_prob_B = max(max_prob_B, sum(np.diagonal(mat_prob, i)) + max(np.diagonal(mat_prob, i)))
            best_win_B = (np.argmax(np.diagonal(mat_prob, i)), i + np.argmax(np.diagonal(mat_prob, i)))
    
    max_expected_points_win_A = 2*prob_win_A + max_prob_A
    max_expected_points_win_B = 2*prob_win_B + max_prob_B
    
    print('Draw: ', max_expected_points_draw)
    print('Win A: ', max_expected_points_win_A)
    print('Win B: ', max_expected_points_win_B)
    
    print(best_win_A)
    print(best_win_B)
    print((np.argmax(np.diagonal(mat_prob, 0)), np.argmax(np.diagonal(mat_prob, 0))))
    
    return mat_prob

In [34]:
a = [1, 2, 3]

In [35]:
print(a.index(max(a)))

2


In [36]:
a_np = np.array(a)
np.argmax(a_np)

2

In [51]:
p_A = [0.25, 0.75, 0, 0, 0, 0, 0, 0, 0, 0]
p_B = [0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0]

get_result_max_points(p_A, p_B)

[[0.125 0.125 0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.375 0.375 0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.   ]]
1.75
Draw:  1.75
Win A:  1.5
Win B:  0.5
(1, 0)
(0, 1)
(1, 1)


array([[0.125, 0.125, 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.375, 0.375, 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ]])