In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
from utils import Dataset
from utils.outlierdetectionmodel import BoxCoxParametricAveragePercent
from utils.dftransformer import get_table_players

In [3]:
_4dm4_dataset = Dataset('4dm4.db')

In [4]:
_4dm4_data = _4dm4_dataset.select('scores', columns=['player_name', 'round', 'beatmap_type', 'beatmap_tag', 'score_logit'], where={
    'beatmap_type': ['LN', 'RC', 'HB'],
})

In [5]:
_4dm4_data.head()

Unnamed: 0,player_name,round,beatmap_type,beatmap_tag,score_logit
0,drag_,Q,RC,1,3.922366
1,JPBenedictGame,Q,RC,1,4.792686
2,iVALORUS,Q,RC,1,2.952871
3,BL5,Q,RC,1,5.119073
4,-Tebo-,Q,RC,1,5.22173


In [6]:
player_table = get_table_players(_4dm4_data)

In [7]:
player_table

Unnamed: 0,Q_RC_1,Q_RC_2,Q_LN_1,Q_HB_1,RO32_RC_1,RO32_RC_4,RO32_RC_5,RO32_LN_3,RO32_HB_1,RO16_RC_1,...,GF_RC_7,GF_LN_2,GF_LN_3,GF_LN_4,GF_HB_1,GF_HB_2,GF_HB_3,SF_RC_2,SF_HB_3,GF_RC_3
drag_,3.922366,2.913464,3.310021,,,,,,,,...,,,,,,,,,,
JPBenedictGame,4.792686,3.630805,3.143908,3.262931,,,,,,,...,,,,,,,,,,
iVALORUS,2.952871,2.364221,1.976354,2.624503,,,,,,,...,,,,,,,,,,
BL5,5.119073,2.868556,3.635748,3.548144,3.984893,3.215893,4.803535,4.269779,,,...,,,,,,,,,,
-Tebo-,5.221730,3.583254,3.915976,3.042808,5.207228,4.899665,5.370992,4.391723,5.008284,5.030982,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
oliverq,,,,,,,,,,,...,,,,,,,,,,
TheFunk,,,,,,4.256847,,,,,...,,,,,,,,,,
Herarudo,,,,,,,5.109978,,,,...,,,,,,,,,,
[Antonio],,,,,,,,,3.711016,,...,,,,,,,,,,


In [8]:
model = BoxCoxParametricAveragePercent()

model.fit(player_table.values)

In [16]:
df = pd.DataFrame(index=player_table.index)

df['outlier_values'] = model.predict(player_table.values)

In [18]:
df.sort_values(by='outlier_values', ascending=False).head(16)

Unnamed: 0,outlier_values
NotXbxx,0.938812
Paturages,0.808961
Micleak,0.802119
NifloX_,0.795386
Ricizus,0.779673
shokoha,0.754505
JayLye,0.739499
XN_The_XD,0.720398
Gray Hardrock,0.718346
Pluty,0.713545


In [19]:
df['n_maps_played'] = np.sum(pd.notna(player_table.values), axis=1)

In [20]:
df

Unnamed: 0,outlier_values,n_maps_played
drag_,0.210072,3
JPBenedictGame,0.369865,4
iVALORUS,0.043815,4
BL5,0.336032,8
-Tebo-,0.542988,17
...,...,...
oliverq,0.248482,7
TheFunk,0.365996,2
Herarudo,0.303336,2
[Antonio],0.076878,2


In [36]:
df['adjusted_ol_values'] = df['outlier_values'] * np.log(df['n_maps_played'])

In [37]:
df.sort_values(by='adjusted_ol_values', ascending=False).head(15)

Unnamed: 0,outlier_values,n_maps_played,adjusted_ol_values
shokoha,0.754505,50,2.95164
Micleak,0.802119,31,2.754468
Gray Hardrock,0.718346,45,2.734502
Lazereed,0.688086,41,2.555258
Ricizus,0.779673,26,2.540251
[LS]Byte,0.651652,44,2.465975
henryalexbr,0.638299,45,2.429787
Keskidi,0.64274,43,2.417474
JayLye,0.739499,26,2.409358
MisteurFR,0.691559,31,2.374805
