# Classement des Dragibus

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Données à classer

In [2]:
data_df = pd.DataFrame([
    ['Vert', 'Noir', -1],
    ['Rouge', 'Vert', 0],
    ['Noir', 'Vert', 1],
    ['Jaune', 'Rouge', 0],
    ['Rose', 'Noir', 1],
    ['Jaune', 'Vert', -1],
    ['Bleu', 'Vert', 1],
    ['Bleu', 'Jaune', 1],
    ['Rouge', 'Noir', 1],
    ['Jaune', 'Rouge', 1],
    ['Rose', 'Rouge', -1],
    ['Noir', 'Bleu', 1],
    ['Rouge', 'Noir', 1],
], columns=['A', 'B', 'score'])

## Classement, méthode matricielle

In [3]:
def get_colors(df, col_name_1, col_name_2):
    return np.unique(
        np.concatenate(
            (df[col_name_1].values, df[col_name_2].values)
            )
        ).tolist()

colors = get_colors(data_df, 'A', 'B')
colors

['Bleu', 'Jaune', 'Noir', 'Rose', 'Rouge', 'Vert']

In [4]:
def create_matrix(n):
    matrix = []
    for i in range(0, n):
        row = []    
        for j in range(0, n):
            row.append([])
        matrix.append(row)
    return matrix

def iter_votes(df, colors):
    for _, row in df.iterrows():
        yield colors.index(row[0]), colors.index(row[1]), row[2]

def populate_matrix(df, colors):
    matrix = create_matrix(len(colors))
    for i, j, value in iter_votes(df, colors):
        matrix[i][j].append(value)
        matrix[j][i].append(-1*value)
    return matrix

matrix = populate_matrix(data_df, colors)
matrix

  yield colors.index(row[0]), colors.index(row[1]), row[2]


[[[], [1], [-1], [], [], [1]],
 [[-1], [], [], [], [0, 1], [-1]],
 [[1], [], [], [-1], [-1, -1], [1, 1]],
 [[], [], [1], [], [-1], []],
 [[], [0, -1], [1, 1], [1], [], [0]],
 [[-1], [1], [-1, -1], [], [0], []]]

In [5]:
def get_number_wins(matrix):
    wins = [0] * len(matrix)
    losts = [0] * len(matrix)
    draws = [0] * len(matrix)

    for i in range(0, len(matrix)):
        win = 0
        lost = 0
        draw = 0
        for j in range(0, len(matrix)):
            win = win + ((np.array(matrix[i][j]) * .5 + .5) == 1).sum()
            lost = lost + ((np.array(matrix[i][j]) * -0.5 + .5) == 1).sum()
            draw = draw + (np.array(matrix[i][j]) == 0).sum()
        wins[i] = win
        losts[i] = lost
        draws[i] = draw
    return wins, losts, draws

wins, losts, draws = get_number_wins(matrix)

synt = pd.DataFrame({
    'Color': colors,
    'Win': wins,
    'Lost': losts,
    'Draw': draws
}).set_index('Color')

synt

Unnamed: 0_level_0,Win,Lost,Draw
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bleu,2,1,0
Jaune,1,2,1
Noir,3,3,0
Rose,1,1,0
Rouge,3,1,2
Vert,1,3,1


### Classement naïf
On fait simplement la moyenne des succés.

In [6]:
synt['Points'] = (synt['Win'] - synt['Lost']) / synt[['Win', 'Lost']].sum(axis=1)

synt.sort_values(by='Points', ascending=False, inplace=True)

synt

Unnamed: 0_level_0,Win,Lost,Draw,Points
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rouge,3,1,2,0.5
Bleu,2,1,0,0.333333
Noir,3,3,0,0.0
Rose,1,1,0,0.0
Jaune,1,2,1,-0.333333
Vert,1,3,1,-0.5


Attention, ce classement ne tient pas compte de qui a battu qui !

## Calcul des probabilités de gagner

Inspiration de la méthode de Borda

In [7]:
def compute_proba(matrix):
    proba = np.zeros((len(matrix), len(matrix)))
    for i in range(0, len(matrix)): 
        for j in range(0, len(matrix)):
            proba[i][j] = (np.array(matrix[i][j])*.5+.5).mean() if len(matrix[i][j]) > 0 else 0.
    return proba

proba = compute_proba(matrix)
proba_df = pd.DataFrame(proba, columns=colors)
proba_df['Color'] = pd.Series(colors)
proba_df.set_index('Color', inplace=True)

proba_df['Total'] = proba_df[colors].sum(axis=1)

proba_df = proba_df.join(synt).sort_values(by='Total', ascending=False)

proba_df

Unnamed: 0_level_0,Bleu,Jaune,Noir,Rose,Rouge,Vert,Total,Win,Lost,Draw,Points
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Rouge,0.0,0.25,1.0,1.0,0.0,0.5,2.75,3,1,2,0.5
Bleu,0.0,1.0,0.0,0.0,0.0,1.0,2.0,2,1,0,0.333333
Noir,1.0,0.0,0.0,0.0,0.0,1.0,2.0,3,3,0,0.0
Vert,0.0,1.0,0.0,0.0,0.5,0.0,1.5,1,3,1,-0.5
Rose,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1,1,0,0.0
Jaune,0.0,0.0,0.0,0.0,0.75,0.0,0.75,1,2,1,-0.333333


Le faible nombre d'expérience biaise le score total, cf Rose qui devrait être dernier)

==> Il faut pondérer la probabilité de gagner avec la probabilité de parier sur la couleur.

In [8]:
n_exp = len(data_df)
proba_df['Score'] = proba_df['Total'] * proba_df[['Win', 'Lost']].sum(axis=1) / n_exp
proba_df.sort_values(by='Score', ascending=False, inplace=True)
proba_df

Unnamed: 0_level_0,Bleu,Jaune,Noir,Rose,Rouge,Vert,Total,Win,Lost,Draw,Points,Score
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Noir,1.0,0.0,0.0,0.0,0.0,1.0,2.0,3,3,0,0.0,0.923077
Rouge,0.0,0.25,1.0,1.0,0.0,0.5,2.75,3,1,2,0.5,0.846154
Bleu,0.0,1.0,0.0,0.0,0.0,1.0,2.0,2,1,0,0.333333,0.461538
Vert,0.0,1.0,0.0,0.0,0.5,0.0,1.5,1,3,1,-0.5,0.461538
Jaune,0.0,0.0,0.0,0.0,0.75,0.0,0.75,1,2,1,-0.333333,0.173077
Rose,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1,1,0,0.0,0.153846


In [9]:
proba_df['Total'].sum()

np.float64(10.0)

## Méthode Elo

Le classement Elo est un système d’évaluation comparatif du niveau de jeu des joueurs d’échecs, de go ou d’autres jeux en un contre un.

Ce système est également utilisé pour le classement des équipes de football (depuis juillet 2018, mais de manière non officielle), ainsi que par de nombreux jeux en ligne. Tout joueur qui participe à ce type de compétition se voit attribuer un classement provisoire1, classement qui évoluera en fonction de ses performances, et qui reflète sa probabilité de gagner. 

In [10]:
class Elo:

    def __init__(self, ratingDict={}, k:int=20):
        self.ratingDict = ratingDict
        self.k = k

    def addPlayer(self,name,rating = 1500):
        self.ratingDict[name] = rating

    def gameOver(self, playerA, playerB, result):        
        
        # Rating players
        rating_A = self.ratingDict[playerA]
        rating_B = self.ratingDict[playerB]        
        
        # Expected result for player A
        E_A = self.expectResult(rating_A, rating_B)        
        
        # Expected result for player B
        E_B = 1 - E_A
        
        # Rating updates
        self.ratingDict[playerA] = self.ratingDict[playerA] + self.k * ((1 + result) * .5 - E_A)
        self.ratingDict[playerB] = self.ratingDict[playerB] + self.k * ((1 - result) * .5 - E_B)
        
    def expectResult(self, r1, r2):
        exp = (r2 - r1) / 400.
        return 1. / (pow(10., exp) + 1.)

def validation():
    # https://fr.wikipedia.org/wiki/Classement_Elo#Application_pratique
    
    # Payer 1 wins
    elo = Elo({
        'Player 1': 1800,
        'Player 2': 2005
    })
    
    elo.gameOver('Player 1', 'Player 2', 1)
    
    assert round(elo.ratingDict['Player 1']) == 1815 and round(elo.ratingDict['Player 2']) == 1990
    
    # Payer 1 wins
    elo = Elo({
        'Player 1': 1800,
        'Player 2': 2005
    })
    
    elo.gameOver('Player 2', 'Player 1', -1)
    
    assert round(elo.ratingDict['Player 1']) == 1815 and round(elo.ratingDict['Player 2']) == 1990
    
    # Draw
    elo = Elo({
        'Player 1': 1800,
        'Player 2': 2005
    })
    
    elo.gameOver('Player 1', 'Player 2', 0)
    
    assert round(elo.ratingDict['Player 1']) == 1805 and round(elo.ratingDict['Player 2']) == 2000
    
validation()

In [11]:
# Initialisation
elo = Elo(k=100)

for player in colors:
    elo.addPlayer(player)

# Entrainement du modèle
for _, row in data_df.iterrows():
    elo.gameOver(row[0], row[1], row[2])

  elo.gameOver(row[0], row[1], row[2])


In [12]:
df_rank = synt.join(
    pd.DataFrame.from_dict(elo.ratingDict, orient='index', columns=['Rank'])
).sort_values(by='Rank', ascending=False)
df_rank

Unnamed: 0_level_0,Win,Lost,Draw,Points,Rank
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rouge,3,1,2,0.5,1589.708803
Bleu,2,1,0,0.333333,1516.507039
Rose,1,1,0,0.0,1500.438033
Noir,3,3,0,0.0,1486.475682
Jaune,1,2,1,-0.333333,1472.842841
Vert,1,3,1,-0.5,1434.027602


In [13]:
df_rank['Rank'].mean()

np.float64(1500.0)

## Augmentation de la taille de l'échantillon