# Experimentation

In [1]:
from rstt import Ranking
from rstt.stypes import SPlayer
import pandas as pd
from scipy import stats

def evaluate(data: pd.DataFrame, reference: Ranking, trained: Ranking, population: list[SPlayer], percentiles: list[int]=[5, 10, 50]):
    row = {}
    
    # rank correlation
    row['KendallTau'] = stats.kendalltau(reference[population], trained[population]).statistic

    # rank difference
    rank_diffs = [reference[p]- trained[p] for p in population]
    row['Overrate'] = abs(min(rank_diffs))
    row['Underrate'] = max(rank_diffs)

    # accuracy
    for percentile in percentiles:
        threshold = max(int(len(population) // (100/percentile)), 1)
        row[f'{percentile}pr accuracy'] = len(set(reference[:threshold]).intersection(set(trained[:threshold]))) / threshold

    data = pd.concat([data, pd.DataFrame([row])], axis=0, ignore_index=True)
    return data

In [2]:
from rstt import BasicPlayer, BasicElo, LogSolver, BTRanking, RoundRobin

from simulation import settings, experiments, metrics

# parameters
samples = 100
NBPlayers = 60

# data 
COLUMNS = ['KendallTau', 'Underrate', 'Overrate', '5pr accuracy', '10pr accuracy', '50pr accuracy']
RESULTS: dict[str, pd.DataFrame] = {}

exp_name = 'FullRoundRobin'
RESULTS[exp_name] = pd.DataFrame(columns=COLUMNS, dtype='float')
for i in range(samples):
    # test ranking
    elo = BasicElo(f'Elo-{exp_name}')
    
    # simulation model
    model = settings.baseModel(region=1, nb=NBPlayers)
    
    # data set production
    data = experiments.regional_round_robins(f'{exp_name}-Round-Robin',
                                             seeding=model.groundtruth,
                                             population=[model.ecosystem.teams()],
                                             solver=model.solver)
    # elo update
    elo.update(**data)

    # analysis
    RESULTS[exp_name] = evaluate(RESULTS[exp_name], model.groundtruth, elo, model.groundtruth.players())

# show summary
RESULTS[exp_name].describe()

Unnamed: 0,KendallTau,Underrate,Overrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.903808,8.2,8.4,0.816667,0.88,0.951
std,0.014707,2.0695,2.283007,0.197487,0.111313,0.031589
min,0.859887,4.0,5.0,0.333333,0.5,0.866667
25%,0.894633,7.0,7.0,0.666667,0.833333,0.933333
50%,0.903955,8.0,8.0,0.833333,0.833333,0.95
75%,0.912994,9.0,9.0,1.0,1.0,0.966667
max,0.934463,14.0,16.0,1.0,1.0,1.0


In [3]:
from project import scene

exp_name = 'Regions'

# store metrics
for region in scene.Region:
    RESULTS[f'{exp_name}-{region}'] = pd.DataFrame(columns=COLUMNS, dtype='float')
RESULTS[exp_name]  = pd.DataFrame(columns=COLUMNS, dtype='float')

for i in range(samples):
    elo2 = BasicElo(f'Elo-{exp_name}')
    
    model = settings.baseModel(len(scene.Region), 10)
    data = experiments.regional_round_robins(f'{exp_name}-Round-Robin',
                                             seeding=model.groundtruth,
                                             population=[model.ecosystem.teams(region) for region in model.ecosystem.regions()],
                                             solver=model.solver)
    elo2.update(**data)
    
    # analysis
    for region in model.ecosystem.regions():
        RESULTS[f'{exp_name}-{region}'] = evaluate(RESULTS[f'{exp_name}-{region}'], model.groundtruth, elo2, model.ecosystem.teams(region))
    RESULTS[exp_name]  = evaluate(RESULTS[exp_name], model.groundtruth, elo2, model.ecosystem.teams())

# show summary
RESULTS[exp_name].describe()

Unnamed: 0,KendallTau,Underrate,Overrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.708554,20.7,20.56,0.39,0.591667,0.865333
std,0.044987,3.935079,4.416041,0.259737,0.148647,0.044676
min,0.567232,13.0,13.0,0.0,0.333333,0.766667
25%,0.676836,18.0,17.0,0.333333,0.5,0.833333
50%,0.711299,20.0,20.0,0.333333,0.666667,0.866667
75%,0.741808,23.0,23.0,0.666667,0.666667,0.9
max,0.811299,33.0,34.0,1.0,1.0,0.966667


In [4]:
exp_name = 'SkilledRegions'

for region in scene.Region:
    RESULTS[f'{exp_name}-{region}'] = pd.DataFrame(columns=COLUMNS, dtype='float')
RESULTS[exp_name]  = pd.DataFrame(columns=COLUMNS, dtype='float')

for i in range(samples):
    # test ranking
    elo3 = BasicElo(f'Elo-{exp_name}')
    
    # simulation model
    model = settings.regionalSkills(region=len(scene.Region),
                                    nb=10,
                                    mus=[val for val in range(750, 2500, 250)],
                                    sigmas=[300  for _ in scene.Region])
    
    # data set production
    data = experiments.regional_round_robins(f'{exp_name}-Round-Robin',
                                             seeding=model.groundtruth,
                                             population=[model.ecosystem.teams(region) for region in model.ecosystem.regions()],
                                             solver=model.solver)
    # elo update
    elo3.update(**data)

    # analysis
    for region in scene.Region:
        RESULTS[f'{exp_name}-{region}'] = evaluate(RESULTS[f'{exp_name}-{region}'], model.groundtruth, elo3, model.ecosystem.teams(region))
    RESULTS[exp_name]  = evaluate(RESULTS[exp_name], model.groundtruth, elo3, model.groundtruth.players())

# show summary
RESULTS[exp_name].describe()

Unnamed: 0,KendallTau,Underrate,Overrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.324757,37.9,38.68,0.263333,0.345,0.636667
std,0.041202,3.870374,4.385191,0.208113,0.161111,0.048432
min,0.20565,29.0,29.0,0.0,0.0,0.5
25%,0.296045,34.75,36.0,0.0,0.166667,0.6
50%,0.322034,38.0,39.0,0.333333,0.333333,0.633333
75%,0.357062,41.0,42.0,0.333333,0.5,0.666667
max,0.39887,45.0,51.0,0.666667,0.666667,0.766667


In [5]:
RESULTS[f'{exp_name}-{scene.Region.LEC}'].describe()

Unnamed: 0,KendallTau,Underrate,Overrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.714667,20.2,9.24,0.21,0.21,0.308
std,0.113211,5.47169,4.811854,0.40936,0.40936,0.159342
min,0.422222,7.0,0.0,0.0,0.0,0.0
25%,0.644444,16.0,6.0,0.0,0.0,0.2
50%,0.733333,20.0,9.0,0.0,0.0,0.4
75%,0.822222,24.0,13.0,0.0,0.0,0.4
max,0.955556,32.0,22.0,1.0,1.0,0.6
