# Experimentation

In [1]:
from rstt import Ranking
from rstt.stypes import SPlayer
import pandas as pd
from scipy import stats


RESULTS: dict[str, pd.DataFrame] = {}

def evaluate(data: pd.DataFrame, reference: Ranking, trained: Ranking, population: list[SPlayer], percentiles: list[int]=[5, 10, 50]):
    results = {}
    
    # analysis
    results['KendallTau'] = stats.kendalltau(reference[population], trained[population]).statistic
    results['Spearmann'] = stats.spearmanr(reference[population], trained[population]).statistic

    rank_diffs = [reference[p]- trained[p] for p in population]
    results['Overrate'] = abs(min(rank_diffs))
    results['Underrate'] = max(rank_diffs)

    for percentile in percentiles:
        threshold = int(NBPlayers // (100/percentile))
        results[f'{percentile}pr accuracy'] = len(set(reference[:threshold]).intersection(set(trained[:threshold]))) / threshold

    #print(results['Overrate'], results['Underrate'])
    data = pd.concat([data, pd.DataFrame([results])], axis=0, ignore_index=True)
    return data

In [2]:
from rstt import Player, BasicElo, LogSolver, BTRanking, RoundRobin

import pandas as pd
from scipy import stats

# parameters
samples = 100
NBPlayers = 40

correlations = ['KendallTau', 'Spearmann']
diffs = ['Overrate', 'Underrate']
accuracies = ['5pr accuracy', '10pr accuracy', '50pr accuracy']
COLUMNS = correlations + diffs + accuracies
RESULTS['FullRoundRobin'] = pd.DataFrame(columns=COLUMNS)

for i in range(samples):
    # create new player base
    population = Player.create(nb=NBPlayers)
    gtbc = BTRanking('Base Case GroundTruth', population)
    baseElo = BasicElo('Elo on RoundRobin')

    # everyone faces each others once
    cup = RoundRobin('BaseCaseRR', gtbc, LogSolver())
    cup.registration(population)
    cup.run()

    # update elo
    baseElo.update(games=cup.games())

    # analysis
    RESULTS['FullRoundRobin'] = evaluate(RESULTS['FullRoundRobin'], gtbc, baseElo, population)

  data = pd.concat([data, pd.DataFrame([results])], axis=0, ignore_index=True)


In [3]:
RESULTS['FullRoundRobin']['Overrate']  = pd.to_numeric(RESULTS['FullRoundRobin']['Overrate'])
RESULTS['FullRoundRobin']['Underrate']  = pd.to_numeric(RESULTS['FullRoundRobin']['Underrate'])
RESULTS['FullRoundRobin'] .describe()

Unnamed: 0,KendallTau,Spearmann,Overrate,Underrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.887692,0.976702,6.2,5.81,0.79,0.8525,0.938
std,0.022297,0.008197,1.814643,1.593516,0.258003,0.142555,0.041512
min,0.815385,0.950094,3.0,3.0,0.0,0.5,0.85
25%,0.871795,0.97228,5.0,5.0,0.5,0.75,0.9
50%,0.887179,0.977767,6.0,6.0,1.0,0.75,0.95
75%,0.902564,0.982786,7.0,7.0,1.0,1.0,0.95
max,0.938462,0.991932,11.0,11.0,1.0,1.0,1.0


In [4]:
import random

NBRegions = 4
for i in range(NBRegions):
    RESULTS[f'Region_#{i}'] = pd.DataFrame(columns=COLUMNS)
RESULTS['AllRegions']  = pd.DataFrame(columns=COLUMNS)

for i in range(samples):
    # Regional Ecosystem
    regions = [Player.create(nb=NBPlayers//NBRegions, level_dist=random.gauss, level_params={'mu': 1000+j*250, 'sigma': 50}) for j in range(NBRegions)]
    gt = BTRanking('GroundTruth')
    for region in regions:
        gt.add(region)
    
    # Regional DataSet
    cups = []
    for i, region in enumerate(regions):
        cup = cup1 = RoundRobin('Region {i}', gt, LogSolver(lc=600))
        cup1.registration(region)
        cup1.run()
        cups.append(cup)
    
    # One ELo Ranking
    elo = BasicElo('Elo for two regions')
    elo.update(events=cups)

    # analysis
    for i, region in enumerate(regions):
        RESULTS[f'Region_#{i}'] = evaluate(RESULTS[f'Region_#{i}'], gt, elo, region)
    RESULTS['AllRegions']  = evaluate(RESULTS['AllRegions'], gt, elo, gt.players())

  data = pd.concat([data, pd.DataFrame([results])], axis=0, ignore_index=True)


In [5]:
RESULTS['AllRegions']['Overrate']  = pd.to_numeric(RESULTS['AllRegions']['Overrate'])
RESULTS['AllRegions']['Underrate']  = pd.to_numeric(RESULTS['AllRegions']['Underrate'])
RESULTS['AllRegions'] .describe()

Unnamed: 0,KendallTau,Spearmann,Overrate,Underrate,5pr accuracy,10pr accuracy,50pr accuracy
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.679744,0.855208,14.3,14.25,0.465,0.605,0.832
std,0.052879,0.042669,3.39786,3.214157,0.268319,0.198415,0.059255
min,0.512821,0.693809,7.0,9.0,0.0,0.25,0.7
25%,0.652564,0.833208,12.0,12.0,0.5,0.5,0.8
50%,0.676923,0.857223,14.0,14.0,0.5,0.5,0.85
75%,0.712821,0.885507,16.0,16.0,0.5,0.75,0.85
max,0.802564,0.933208,23.0,22.0,1.0,1.0,0.95


In [6]:
for i in range(NBRegions):
    RESULTS[f'Region_#{i}']['Overrate'] = pd.to_numeric(RESULTS[f'Region_#{i}']['Overrate'])
    RESULTS[f'Region_#{i}']['Underrate'] = pd.to_numeric(RESULTS[f'Region_#{i}']['Underrate'])
    print(RESULTS[f'Region_#{i}'].describe())

       KendallTau   Spearmann    Overrate   Underrate  5pr accuracy  \
count  100.000000  100.000000  100.000000  100.000000    100.000000   
mean     0.757333    0.882182    8.580000    8.380000      0.465000   
std      0.110759    0.079289    4.682182    4.237352      0.268319   
min      0.511111    0.660606    0.000000    0.000000      0.000000   
25%      0.688889    0.842424    4.750000    5.750000      0.500000   
50%      0.777778    0.903030    8.000000    8.000000      0.500000   
75%      0.833333    0.942424   12.000000   11.000000      0.500000   
max      0.955556    0.987879   23.000000   21.000000      1.000000   

       10pr accuracy  50pr accuracy  
count     100.000000     100.000000  
mean        0.605000       0.832000  
std         0.198415       0.059255  
min         0.250000       0.700000  
25%         0.500000       0.800000  
50%         0.500000       0.850000  
75%         0.750000       0.850000  
max         1.000000       0.950000  
       KendallTau 

In [7]:
from statistics import mean

for i in range(samples):
    # Regional Ecosystem
    regions = [Player.create(nb=NBPlayers//NBRegions, level_dist=random.gauss, level_params={'mu': 1000+i*250, 'sigma': 0}) for i in range(NBRegions)]
    m = []
    for region in regions:
        m.append(mean([p.level() for p in region]))
    print(m)

[1507.7983482349177, 1352.5840679446692, 1569.03125225412, 1295.0394129897563]
[1480.6848690414984, 1654.611581291731, 1374.1404074837367, 1396.884048472437]
[1347.20628505364, 1699.3443157075694, 1439.0906806593703, 1451.1395178806824]
[1557.4338753254049, 1459.3677341644288, 1534.2908725070747, 1550.29851062591]
[1330.096335732151, 1315.7612552025828, 1683.426350099603, 1436.1761060257516]
[1241.434052856776, 1425.4202494720505, 1526.2321567399363, 1398.2225309949226]
[1493.7939494966206, 1229.350433220176, 1428.057029015685, 1423.9366966481791]
[1757.8185292977948, 1198.754002711649, 1472.5926307936932, 1317.9860661591902]
[1735.6702796463128, 1442.7463913930599, 1304.1281362470595, 1257.7887222529898]
[1467.8395547640575, 1178.7609520092176, 1791.6845336962729, 1342.721352284519]
[1494.7221242505907, 1571.6761708226984, 1558.2263085644506, 1264.2965656991273]
[1690.5941703424953, 1374.591565618842, 1601.6431031646628, 1578.1259305849362]
[1401.5591388960127, 1699.1156682179787, 131

In [8]:
region

[Player - name: Evia Ellis, level: 1813.7206624751075,
 Player - name: Pauline Lovan, level: 1931.5970017913546,
 Player - name: Gino Hoskins, level: 2133.588014875507,
 Player - name: Robert Light, level: 2303.4810011221234,
 Player - name: Catherine Sasaki, level: 794.9923680740927,
 Player - name: Amanda Sales, level: 872.3804880272163,
 Player - name: Christie George, level: 1615.072399730812,
 Player - name: Ronny Lee, level: 2651.195594663828,
 Player - name: Jerry Kelley, level: 1995.6189812484308,
 Player - name: Linda Guthmiller, level: 840.068002852785]