In [1]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
_4dm4_data_fp = 'dataset/4dm4.db'
soft5_data_fp = 'dataset/soft5.db'
soft6_data_fp = 'dataset/soft6.db'

_4dm4_data = sqlite3.connect(_4dm4_data_fp)
soft5_data = sqlite3.connect(soft5_data_fp)
soft6_data = sqlite3.connect(soft6_data_fp)

In [3]:
_4dm4_sql = """SELECT * FROM scores WHERE beatmap_type != \"SV\""""
soft_sql = """SELECT players.name as player_name, scores.beatmap_id, mappools.round, mappools.beatmap_type, scores.score
FROM scores LEFT JOIN mappools ON (scores.beatmap_id = mappools.beatmap_id)
INNER JOIN players ON (scores.player_id = players.id)"""

_4dm4_dataset = pd.read_sql(_4dm4_sql, _4dm4_data)
soft5_dataset = pd.read_sql(soft_sql, soft5_data)
soft6_dataset = pd.read_sql(soft_sql, soft6_data)

In [4]:
_4dm4_dataset

Unnamed: 0,player_name,round,beatmap_type,beatmap_tag,score,score_logit
0,drag_,Q,RC,1,980590,3.922366
1,JPBenedictGame,Q,RC,1,991778,4.792686
2,iVALORUS,Q,RC,1,950399,2.952871
3,BL5,Q,RC,1,994054,5.119073
4,-Tebo-,Q,RC,1,994631,5.221730
...,...,...,...,...,...,...
2760,MisteurFR,GF,TB,1,932066,2.618867
2761,Gray Hardrock,GF,TB,1,908730,2.298226
2762,--Pavin--,GF,TB,1,919303,2.432914
2763,shokoha,GF,TB,1,910134,2.315272


In [5]:
soft6_dataset

Unnamed: 0,player_name,beatmap_id,round,beatmap_type,score
0,[KN]Lordieee,2121101,PO,HB,980915
1,[KN]Lordieee,2388838,PO,HB,983744
2,[KN]Lordieee,3202329,PO,RC,987171
3,[KN]Lordieee,3392120,PO,RC,957210
4,[KN]Lordieee,3501159,PO,LN,981079
...,...,...,...,...,...
1465,Lothus,3588596,F,HB,985477
1466,Lothus,3589150,F,RC,970469
1467,Lothus,3589487,F,HB,956582
1468,Lothus,3590133,F,RC,984770


In [6]:
def logit(x):
    return np.log(x / (1e6 - x))

In [7]:
soft6_dataset['score_logit'] = soft6_dataset['score'].apply(logit)
soft5_dataset['score_logit'] = soft5_dataset['score'].apply(logit)

In [8]:
soft5_dataset = soft5_dataset.rename({'beatmap_id': 'beatmap_tag'}, axis=1)
soft6_dataset = soft6_dataset.rename({'beatmap_id': 'beatmap_tag'}, axis=1)

In [9]:
from utils.etimodel import ETIModel, GeneralizedETIModel

In [10]:
_4dm4_players = _4dm4_dataset['player_name'].unique().astype(str)
soft5_players = soft5_dataset['player_name'].unique().astype(str)
soft6_players = soft6_dataset['player_name'].unique().astype(str)

In [11]:
np.intersect1d(_4dm4_players, soft5_players)

array(['ERA medium kek', 'Miaurichesu', 'Mr_adamello', 'NightNarumi',
       'Zergh', '_Seth', 'samuelhklumpers', 'shokoha', 'soutin',
       'spamblock', 'wolfpup08'], dtype='<U15')

In [29]:
np.intersect1d(_4dm4_players, soft6_players)

array(['Akatsumi Chan', 'Doryoku', 'ERA medium kek', 'Enthalpy',
       'Froggie09', 'Mateo12345', 'Mr_adamello', 'Nikita3006', 'Satsukel',
       'ShiroPeDeFresa', 'TheFunk', 'Tiger', 'Zergh', 'shokoha',
       'wolfpup08'], dtype='<U15')

In [13]:
from utils.tables import get_table_from_records

eti_4dm4 = ETIModel()

def get_beatmap_category_table(records: pd.DataFrame, category: str):
    return get_table_from_records(records[records['beatmap_type'] == category], 'player_name', 'beatmap', 'score_logit')

_4dm4_dataset['beatmap'] = _4dm4_dataset['round'] + "_" + _4dm4_dataset['beatmap_type'] + "_" + _4dm4_dataset['beatmap_tag'].astype(str)
rc = get_beatmap_category_table(_4dm4_dataset, "RC")
hb = get_beatmap_category_table(_4dm4_dataset, "HB")
ln = get_beatmap_category_table(_4dm4_dataset, "LN")

rc_eti = pd.DataFrame(eti_4dm4.fit(rc.values), index=rc.index)
hb_eti = pd.DataFrame(eti_4dm4.fit(hb.values), index=hb.index)
ln_eti = pd.DataFrame(eti_4dm4.fit(ln.values), index=ln.index)

eti_cats = pd.DataFrame(index=_4dm4_players)
eti_cats['rc'] = rc_eti
eti_cats['hb'] = hb_eti
eti_cats['ln'] = ln_eti
eti_cats['rc'] = eti_cats['rc'].fillna(eti_cats['rc'].min())
eti_cats['hb'] = eti_cats['hb'].fillna(eti_cats['hb'].min())
eti_cats['ln'] = eti_cats['ln'].fillna(eti_cats['ln'].min())

In [14]:
eti_cats

Unnamed: 0,rc,hb,ln
drag_,-2.203943,-1.922962,-1.848750
JPBenedictGame,-2.034906,-1.485538,-1.881729
iVALORUS,-2.368395,-1.628269,-2.113531
BL5,-1.515542,-1.421774,-1.379444
-Tebo-,1.049462,0.151761,-1.019061
...,...,...,...
oliverq,-1.845866,-1.815909,-2.073226
TheFunk,-2.227620,-1.522535,-2.290171
Herarudo,-2.308657,-1.922962,-2.120954
[Antonio],-2.664934,-1.588431,-2.290171


In [15]:
soft6_model = GeneralizedETIModel(_4dm4_dataset, ['RC', 'HB', 'LN'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

In [16]:
soft6_model.fit_regression(soft6_dataset)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

In [31]:
soft6_model.predict(soft6_dataset).sort_values(0, ascending=False).head(15)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

Unnamed: 0,0
[GS]Myuka,79.093969
Amerom,58.573128
Reyi,55.148052
SillyFangirl,52.713042
-Aishiteru-,52.171205
Silhoueska Elze,47.490179
XxNewson1234xX,45.645447
Lothus,38.489039
Lott,32.163295
banan,26.801999


In [18]:
soft5_model = GeneralizedETIModel(_4dm4_dataset, ['RC', 'HB', 'LN'])
soft5_model.fit_regression(soft5_dataset)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

In [27]:
soft5_model.predict(soft5_dataset).sort_values(0, ascending=False).tail(15)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

Unnamed: 0,0
etterna in osu,-3.105326
LeMarcinho,-3.974243
Halogen-,-4.56066
mashu,-4.882351
xSnaggles,-5.668871
AMDuskia1996,-6.213216
Yonk_,-6.256052
jkzu123,-6.380928
BossPlays,-6.687865
HowToPlayLN,-6.902062


In [32]:
soft6_eti = soft6_model.predict(soft6_dataset).sort_values(0, ascending=False)
soft6_eti.to_csv('soft6_4dm_eti.csv')

soft5_eti = soft5_model.predict(soft5_dataset).sort_values(0, ascending=False)
soft5_eti.to_csv('soft5_4dm_eti.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_records['beatmap'] = filtered_records.apply(lambda x: x['round'] + "_" + x['beatmap_type'] + "_" + str(x['beatmap_tag']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand