In [1]:
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
import numpy as np
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

In [3]:
_4dm3_data_fp = 'dataset/4dm3.db'
_4dm4_data_fp = 'dataset/4dm4.db'
soft6_data_fp = 'dataset/soft6.db'

_4dm4_connection = sqlite3.connect(_4dm4_data_fp)
_4dm3_connection = sqlite3.connect(_4dm3_data_fp)
soft6_connection = sqlite3.connect(soft6_data_fp)

In [4]:
_4dm4_sql = """SELECT * FROM scores WHERE beatmap_type != \"SV\""""
_4dm3_sql = """SELECT * FROM scores WHERE beatmap_type != \"SV\""""

soft_sql = """SELECT players.name as player_name, scores.beatmap_id, mappools.round, mappools.beatmap_type, scores.score
FROM scores LEFT JOIN mappools ON (scores.beatmap_id = mappools.beatmap_id)
INNER JOIN players ON (scores.player_id = players.id)"""

_4dm4_dataset = pd.read_sql(_4dm4_sql, _4dm4_connection)
_4dm3_dataset = pd.read_sql(_4dm3_sql, _4dm3_connection)
soft6_dataset = pd.read_sql(soft_sql, soft6_connection)

In [5]:
def logit(x):
    return np.log(x / (1e6 - x))

soft6_dataset['score_logit'] = soft6_dataset['score'].apply(logit)
soft6_dataset = soft6_dataset.rename({'beatmap_id': 'beatmap_tag'}, axis=1)

In [6]:
from utils.etimodel import GeneralizedETIModel

main_beatmap_category = ['RC', 'HB', 'LN']

_4dm3_model = GeneralizedETIModel(_4dm4_dataset, main_beatmap_category)
soft6_model = GeneralizedETIModel(_4dm4_dataset, main_beatmap_category)

In [7]:
_4dm3_model.fit_regression(_4dm3_dataset)
soft6_model.fit_regression(soft6_dataset)

In [8]:
_4dm3_etis = _4dm3_model.predict(_4dm3_dataset)
soft6_etis = soft6_model.predict(soft6_dataset)

In [9]:
_4dm4_etis = _4dm3_model._4dm_eti().mean(axis=1)
_4dm4_etis = pd.DataFrame(_4dm4_etis)

In [10]:
_4dm4_etis['source'] = '4dm4'
_4dm3_etis['source'] = '4dm3'
soft6_etis['source'] = 'soft6'

In [11]:
etis = pd.concat((soft6_etis, _4dm3_etis, _4dm4_etis)).reset_index().groupby('index').max()

In [12]:
_4dm4_and_soft6_etis = etis[etis['source'] != '4dm3']

In [13]:
_4dm4_and_soft6_etis['skillban_prob'] = _4dm4_and_soft6_etis['source'].apply(lambda x: 0 if x == '4dm4' else 1)

In [14]:
from sklearn.linear_model import LogisticRegression

x = _4dm4_and_soft6_etis[0].values.reshape(-1, 1)
y = _4dm4_and_soft6_etis['skillban_prob'].values

In [15]:
skillban_model = LogisticRegression()

skillban_model.fit(x, y)

In [16]:
def predict(x: pd.DataFrame):
    return skillban_model.predict_proba(x[0].values.reshape(-1, 1))

In [17]:
_4kpp_estimate_dataset = pd.read_csv('4kpp_estimate.csv')
_4digit_players = _4kpp_estimate_dataset[_4kpp_estimate_dataset['rank'] >= 1000]

In [18]:
soft6_player_ids = pd.read_sql("""SELECT id, name FROM players""", soft6_connection)
soft6_player_ids.index = soft6_player_ids['id']
list_players = np.intersect1d(_4digit_players['user_id'], soft6_player_ids['id'])
_4digit_in_soft = soft6_player_ids.loc[list_players]['name'].values

In [19]:
soft6_etis_4digit = soft6_etis[soft6_etis.index.isin(_4digit_in_soft)]

In [20]:
etis_final = pd.concat((_4dm4_etis, _4dm3_etis, soft6_etis_4digit))

In [21]:
etis_final

Unnamed: 0,0,source
drag_,-1.991885,4dm4
JPBenedictGame,-1.800725,4dm4
iVALORUS,-2.036732,4dm4
BL5,-1.438920,4dm4
-Tebo-,0.060721,4dm4
...,...,...
Enthalpy,0.690473,soft6
Minisora,1.971434,soft6
MIkuaimbot,2.351488,soft6
DemiFiendSMT,5.285878,soft6


In [22]:
model_prediction = predict(etis_final)

In [23]:
etis_final['model_prediction'] = model_prediction[:, -1]

In [24]:
etis_final = etis_final.sort_values(by='model_prediction', ascending=False)

In [25]:
def determine_skillbans(model_pred):
    if model_pred > 0.9:
        return "Strong"
    if model_pred > 0.75:
        return "Weak"
    return "Pass"

In [26]:
etis_final['skillban'] = etis_final['model_prediction'].apply(determine_skillbans)

In [27]:
etis_final

Unnamed: 0,0,source,model_prediction,skillban
shokoha,13.858188,4dm4,0.998726,Strong
[-Leon-],11.040662,4dm3,0.994038,Strong
Stability,7.911546,soft6,0.967616,Strong
Gray Hardrock,7.679832,4dm4,0.963380,Strong
2fast4you98,7.321128,4dm3,0.955756,Strong
...,...,...,...,...
Yosuf,-2.222836,4dm4,0.102408,Pass
NotXbxx,-2.232947,4dm4,0.101899,Pass
JostinC,-2.255380,4dm4,0.100776,Pass
Kaito-kun,-2.292689,4dm4,0.098934,Pass


In [28]:
etis_final.to_csv('etis_final_4dm4_4dm3_soft6.csv')

In [29]:
skillbanned_players = etis_final[etis_final['skillban'] != 'Pass'].index

In [30]:
source_dataset = {
    '4dm4': _4dm4_dataset,
    '4dm3': _4dm3_dataset,
    'soft6': soft6_dataset
}

In [31]:
from utils.highlights import get_highlighted_scores

n_highlighted_scores = 5
highlighted_scores = pd.DataFrame(columns=['player_name', 'source', 'beatmap', 'score'])

In [32]:
for i, player in enumerate(skillbanned_players):
    source = etis_final.iloc[i]['source']
    source_ds = source_dataset[source]
    highlighted_player_score = get_highlighted_scores(player, source_ds, n_highlighted_scores)
    for beatmap in highlighted_player_score:
        highlight_scr = source_ds[(source_ds['player_name'] == player) & (source_ds['beatmap'] == beatmap)][['player_name', 'beatmap', 'score']]
        highlight_scr['source'] = source
        highlighted_scores = pd.concat((highlighted_scores, highlight_scr))

In [34]:
highlighted_scores.to_csv("highlight_generalized.csv")