In [1]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
_4dm4_data_fp = 'dataset/4dm4.db'

_4dm4_connection = sqlite3.connect(_4dm4_data_fp)

In [3]:
ranked_loved_data_fp = "dataset/rankedscores_top_10000.db"

ranked_loved_connection = sqlite3.connect(ranked_loved_data_fp)

In [4]:
def get_all_4k_scores():
    sql = """
    SELECT osu_scores_mania_high.user_id, osu_scores_mania_high.beatmap_id, max(osu_scores_mania_high.pp)
    FROM osu_scores_mania_high
    WHERE osu_scores_mania_high.beatmap_id in (SELECT beatmap_id FROM osu_beatmaps 
    WHERE osu_beatmaps.diff_size = 4 AND osu_beatmaps.playmode = 3 AND osu_beatmaps.approved = 1)
    GROUP BY osu_scores_mania_high.user_id, osu_scores_mania_high.beatmap_id
    ORDER BY osu_scores_mania_high.user_id, max(osu_scores_mania_high.pp) DESC;
    """
    return pd.read_sql(sql, ranked_loved_connection).fillna(0)

In [5]:
all_4k_scores = get_all_4k_scores()

In [6]:
all_4k_scores[all_4k_scores['user_id'] == 10879600].sort_values('max(osu_scores_mania_high.pp)', ascending=False)

Unnamed: 0,user_id,beatmap_id,max(osu_scores_mania_high.pp)
2705890,10879600,767046,559.60100
2705891,10879600,1217803,521.03900
2705892,10879600,2145124,513.32700
2705893,10879600,1092805,506.31700
2705894,10879600,2568205,499.63300
...,...,...,...
2706430,10879600,403780,5.71202
2706431,10879600,1783629,5.69190
2706432,10879600,1317017,5.36417
2706433,10879600,914257,4.17850


In [7]:
def get_pp(player_id):
    top_pp = all_4k_scores[all_4k_scores['user_id'] == player_id].sort_values('max(osu_scores_mania_high.pp)', ascending=False)
    top_pp = top_pp['max(osu_scores_mania_high.pp)']
    weightage = np.power(0.95, np.arange(len(top_pp)))
    return float(np.dot(weightage, top_pp))

In [8]:
player_id_query = "SELECT user_id FROM osu_user_stats_mania"

player_ids = pd.read_sql(player_id_query, ranked_loved_connection)

In [9]:
player_ids

Unnamed: 0,user_id
0,65724
1,75777
2,82751
3,85797
4,86188
...,...
9995,31328409
9996,31340937
9997,31377952
9998,31396760


In [10]:
all_4k_scores

Unnamed: 0,user_id,beatmap_id,max(osu_scores_mania_high.pp)
0,65724,3525702,586.8620
1,65724,3167576,582.3460
2,65724,767046,550.3140
3,65724,2598323,524.1070
4,65724,1156963,510.6990
...,...,...,...
6140594,31406064,446726,164.0890
6140595,31406064,1852994,151.3890
6140596,31406064,1085914,148.3960
6140597,31406064,1207642,137.2920


In [11]:
decay_rate = 0.95

In [12]:
current_weight = 1

weights = []
current_user = 0
for user_id in all_4k_scores['user_id']:
    if user_id != current_user:
        current_user = user_id
        current_weight = 1
    weights.append(current_weight)
    current_weight *= decay_rate

In [13]:
all_4k_scores['weights'] = weights

In [14]:
all_4k_scores

Unnamed: 0,user_id,beatmap_id,max(osu_scores_mania_high.pp),weights
0,65724,3525702,586.8620,1.000000
1,65724,3167576,582.3460,0.950000
2,65724,767046,550.3140,0.902500
3,65724,2598323,524.1070,0.857375
4,65724,1156963,510.6990,0.814506
...,...,...,...,...
6140594,31406064,446726,164.0890,0.902500
6140595,31406064,1852994,151.3890,0.857375
6140596,31406064,1085914,148.3960,0.814506
6140597,31406064,1207642,137.2920,0.773781


In [15]:
all_4k_scores['weighted_pp'] = all_4k_scores['max(osu_scores_mania_high.pp)'] * all_4k_scores['weights']

In [16]:
_4k_pp = all_4k_scores[['user_id', 'weighted_pp']].groupby('user_id').sum()

In [17]:
_4k_pp = _4k_pp.sort_values('weighted_pp', ascending=False)

_4k_pp.head(25)

Unnamed: 0_level_0,weighted_pp
user_id,Unnamed: 1_level_1
10072733,14144.568913
16734203,14125.605263
15806513,13904.314696
2288363,13635.845447
10083439,13593.986643
16696389,13464.593633
9169747,13363.490835
10790649,13237.780362
13385865,13196.855548
14952077,13108.894681


In [18]:
_4k_pp['rank'] = np.arange(len(_4k_pp)) + 1

In [19]:
_4k_pp.to_csv('dataset/4kpp_estimate.csv')