In [1]:
# necessary imports
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import PolynomialFeatures
from fetcher import fetch_maps, fetch_scores

### Fetch Maps and Scores

In [2]:
# get ranked maps
maps_df = fetch_maps()

In [3]:
maps_df.head()

Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,duration,stars,passRating,accRating,techRating,difficultyName,type,mod_stars,mod_passRating,mod_accRating,mod_techRating
0,57c31,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,248,3.565262,0.992113,5.772469,3.247698,Normal,Accuracy,3.565262,0.992113,5.772469,3.247698
1,57c51,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,248,5.456755,2.433202,7.259916,6.221073,Hard,Tech,5.456755,2.433202,7.259916,6.221073
2,57c71,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,248,6.634682,3.184074,8.040394,8.696276,Expert,Tech,6.634682,3.184074,8.040394,8.696276
3,1fd41x11,1fd41x,https://eu.cdn.beatsaver.com/1fb8d26ef00049d75...,https://cdn.assets.beatleader.xyz/songcover-1f...,Zombified,,Falling In Reverse,Bytrius,182.0,221,2.996581,1.331649,5.175914,1.15891,Easy,Accuracy,2.996581,1.331649,5.175914,1.15891
4,1fd41x31,1fd41x,https://eu.cdn.beatsaver.com/1fb8d26ef00049d75...,https://cdn.assets.beatleader.xyz/songcover-1f...,Zombified,,Falling In Reverse,Bytrius,182.0,221,4.16884,2.34582,6.326489,1.984545,Normal,Accuracy,4.16884,2.34582,6.326489,1.984545


In [4]:
# get player scores
id = "thinkingswag"
scores_df = fetch_scores(id)

In [5]:
scores_df.head()

Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,duration,...,mod_passRating,mod_accRating,mod_techRating,mod_stars,accuracy,pp,rank,modifiers,fullCombo,dateset
0,2b85fxx71,2b85fxx,https://eu.cdn.beatsaver.com/09fd6d30c55f6d721...,https://cdn.assets.beatleader.xyz/songcover-2b...,At Least Speedcore Artists Aren't In It For Th...,,Loffciamcore & Imil,Slayx,260.0,143,...,15.041139,11.934061,2.134165,7.976207,0.974111,840.22034,2,SF,True,2023-01-25 17:58:22
1,3bcf5xxxxxxxx91,3bcf5xxxxxxxx,https://cdn.beatsaver.com/187bea15de6bd7301239...,https://cdn.assets.beatleader.xyz/songcover-3b...,nieuwe tune,,gladde paling & vieze vaatdoek,Stupidity-101,180.0,93,...,17.20671,13.184697,11.334187,14.775179,0.941264,794.4309,2,FS,False,2024-12-13 18:49:16
2,1cd7791,1cd77,https://eu.cdn.beatsaver.com/08d67c25e377d2013...,https://cdn.assets.beatleader.xyz/songcover-1c...,Deception,,Dance Gavin Dance,cerret,316.0,233,...,15.924928,11.720812,2.477255,8.379396,0.969104,783.93976,2,SF,False,2023-05-13 14:04:36
3,2a1b391,2a1b3,https://eu.cdn.beatsaver.com/604ac21a79c26207c...,https://cdn.assets.beatleader.xyz/songcover-2a...,II. Anal Prolapse Suffocation,,Infant Annihilator,Vilawes,350.0,182,...,14.132151,11.225502,1.858116,10.19641,0.974313,773.7751,2,FS,True,2023-03-15 18:38:45
4,1cdc691,1cdc6,https://eu.cdn.beatsaver.com/eaddeb51358bbd688...,https://cdn.assets.beatleader.xyz/songcover-1c...,We Like To Party! (The Vengabus),[Fvrwvrd Bootleg],Vengaboys,cerret,320.0,125,...,13.822477,11.683339,2.319518,10.649382,0.972415,768.4327,3,FS,True,2023-03-15 18:27:35


In [6]:
# calculate days since scores were set
max_date = scores_df["dateset"].max()
scores_df["days_since"] = (max_date - scores_df["dateset"]).dt.days

### Train Model

Decay Function: $\text{Weight}(t) = e^{-\lambda \times \frac{\text{days since}}{14}}$

In [7]:
# apply weighted decay function so newer scores have more influence on the model
lambda_value = 0.1
decay_weights = np.exp(-lambda_value * scores_df["days_since"] / 14) # 2 weeks
scores_df["decay_weights"] = decay_weights

In [8]:
# set up features for exponential regression model

# modified ratings as independent variables
X = scores_df[["mod_passRating", "mod_accRating", "mod_techRating"]].values.reshape(-1, 3)
X_poly = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)

# dependent variable; invert to mimic downward curve
y = scores_df["accuracy"].to_numpy().reshape(-1, 1)
y_inv = (1 - scores_df["accuracy"]).to_numpy().reshape(-1, 1)
y_inv_log = np.log(y_inv)

LOBF Equation: $\text{model} = (X^TWX)^{-1}X^TWy$

In [9]:
# train model
W = np.diag(decay_weights)
X_poly_bias = np.column_stack([np.ones(X_poly.shape[0]), X_poly])

XtW = np.matmul(X_poly_bias.T, W)
XtWX_inv = np.linalg.inv(np.matmul(XtW, X_poly_bias))
XtWy = np.matmul(XtW, y_inv_log)

model = np.matmul(XtWX_inv, XtWy)
model

array([[-5.88277658e+00],
       [-7.42769537e-02],
       [ 3.44152759e-01],
       [-3.78696420e-02],
       [ 9.43020322e-03],
       [-1.37723573e-02],
       [-5.70035940e-04],
       [ 2.26044003e-06],
       [-2.88758128e-03],
       [ 8.53622321e-03]])

### Predict Scores

In [10]:
# predict scores
ypreds_inv = np.dot(X_poly, model[1:]) + model[0]
ypreds = 1 - np.exp(ypreds_inv)

scores_df["pred_accuracy"] = ypreds
scores_df[["accuracy", "pred_accuracy"]].head()

Unnamed: 0,accuracy,pred_accuracy
0,0.974111,0.965439
1,0.941264,0.941027
2,0.969104,0.965211
3,0.974313,0.969391
4,0.972415,0.968227


In [24]:
from models import PRED_FEATURES
from pp import WEIGHT_CURVE, calc_pp_from_accuracy
from utils import filter_unplayed

PRED_FEATURES = ['leaderboardId', 'songId', 'cover', 'fullCover', 'name', 'subName',
                 'author', 'mapper', 'bpm', 'duration', 'difficultyName', 'type',
                 'stars', 'passRating', 'accRating', 'techRating', 
                 'mod_stars','mod_passRating', 'mod_accRating', 'mod_techRating', 
                 "status", "modifiers", "current_acc", "pred_acc", "acc_gain",
                 "current_pp", "pred_pp", "max_pp",
                 "unweighted_pp_gain", "weighted_pp_gain", "weights"]

def apply_weight_curve(pred_df: pd.DataFrame) -> pd.DataFrame:
  weighted_df = pred_df.copy()
  
  weighted_df = weighted_df.sort_values(by="max_pp", ascending=False)
  current_pp = weighted_df["current_pp"].sort_values(ascending=False).to_numpy()

  weights = np.zeros(len(pred_df))
  weight_idx = 0
  curve_idx = 0

  for _, row in weighted_df.iterrows():
    if curve_idx < len(WEIGHT_CURVE) - 1 and row["max_pp"] < current_pp[curve_idx]:
      curve_idx += 1
    weights[weight_idx] = WEIGHT_CURVE[curve_idx]
    weight_idx += 1
  
  weighted_df["weighted_pp_gain"] = weighted_df["unweighted_pp_gain"] * weights
  weighted_df["weights"] = weights

  return weighted_df

unplayed_df = filter_unplayed(scores_df, maps_df)
scores_df["status"] = "played"; unplayed_df["status"] = "unplayed"
pred_df = pd.concat([scores_df, unplayed_df], ignore_index=True)

X = pred_df[["mod_passRating", "mod_accRating", "mod_techRating"]].values.reshape(-1, 3)
X_poly = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)

pred_df["current_acc"] = np.where(~pred_df["accuracy"].isna(), pred_df["accuracy"], 0)
pred_df["pred_acc"] = np.minimum(1, 1 - np.exp(np.dot(X_poly, model[1:]) + model[0]))
pred_df["acc_gain"] = np.maximum(0, pred_df["pred_acc"] - pred_df["current_acc"])

pred_df["current_pp"] = np.where(~pred_df["pp"].isna(), pred_df["pp"], 0)
pred_df["pred_pp"] = pred_df.apply(lambda row: calc_pp_from_accuracy(
  row["pred_acc"], row["mod_passRating"], row["mod_accRating"], row["mod_techRating"])["total_pp"], 
  axis=1)
pred_df["max_pp"] = np.maximum(pred_df["current_pp"], pred_df["pred_pp"])
pred_df["unweighted_pp_gain"] = np.maximum(0, pred_df["pred_pp"] - pred_df["current_pp"])
pred_df = apply_weight_curve(pred_df)

pred_df = pred_df[PRED_FEATURES].sort_values(by="max_pp", ascending=False)

In [25]:
pred_df.head(30)

Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,duration,...,modifiers,current_acc,pred_acc,acc_gain,current_pp,pred_pp,max_pp,unweighted_pp_gain,weighted_pp_gain,weights
0,2b85fxx71,2b85fxx,https://eu.cdn.beatsaver.com/09fd6d30c55f6d721...,https://cdn.assets.beatleader.xyz/songcover-2b...,At Least Speedcore Artists Aren't In It For Th...,,Loffciamcore & Imil,Slayx,260.0,143,...,SF,0.974111,0.965439,0.0,840.22034,735.249972,840.22034,0.0,0.0,1.0
805,3ce7axxxxxxxxxxx91,3ce7axxxxxxxxxxx,https://cdn.beatsaver.com/6343bbda0b1c75d52423...,https://cdn.assets.beatleader.xyz/songcover-3c...,The Purple Dimension,Extended Version,ToonTubers,Cratornugget & ViSi,464.0,187,...,,0.0,0.960212,0.960212,0.0,813.351811,813.351811,813.351811,784.884498,0.965
35,1ac1791,1ac17,https://eu.cdn.beatsaver.com/8cf4844d5dd772821...,https://cdn.assets.beatleader.xyz/songcover-1a...,Carmina,(fallen shepherd Remix),CANVAS feat. Quimar,abcbadq,195.0,252,...,SF,0.947865,0.959657,0.011793,730.96545,798.001523,798.001523,67.036073,64.689811,0.965
20,1ace571,1ace5,https://eu.cdn.beatsaver.com/61caada06a65088bd...,https://cdn.assets.beatleader.xyz/songcover-1a...,Venomous Firefly,,Camellia,ComplexFrequency,264.0,122,...,SF,0.952253,0.960758,0.008506,742.475,795.502003,795.502003,53.027003,51.171058,0.965
1,3bcf5xxxxxxxx91,3bcf5xxxxxxxx,https://cdn.beatsaver.com/187bea15de6bd7301239...,https://cdn.assets.beatleader.xyz/songcover-3b...,nieuwe tune,,gladde paling & vieze vaatdoek,Stupidity-101,180.0,93,...,FS,0.941264,0.941027,0.0,794.4309,793.387391,794.4309,0.0,0.0,0.965
231,1a59391,1a593,https://eu.cdn.beatsaver.com/471b5a6822576df5d...,https://cdn.assets.beatleader.xyz/songcover-1a...,Barbecue,,Igorrr & Ruby My Dear,Schwank & Jabob,200.0,226,...,SF,0.923051,0.957831,0.034779,644.0483,788.760273,788.760273,144.711973,134.726847,0.931
2,1cd7791,1cd77,https://eu.cdn.beatsaver.com/08d67c25e377d2013...,https://cdn.assets.beatleader.xyz/songcover-1c...,Deception,,Dance Gavin Dance,cerret,316.0,233,...,SF,0.969104,0.965211,0.0,783.93976,746.496925,783.93976,0.0,0.0,0.931
7,2b57191,2b571,https://eu.cdn.beatsaver.com/429ff959e81100e5a...,https://cdn.assets.beatleader.xyz/songcover-2b...,Black Rover,,Vickeblanka,Aquaflee,204.04,90,...,SF,0.964075,0.967071,0.002996,755.1152,783.209564,783.209564,28.094364,25.256833,0.899
1017,2c888xxxxx91,2c888xxxxx,https://eu.cdn.beatsaver.com/3f567bc5cc7ada8c9...,https://cdn.assets.beatleader.xyz/songcover-2c...,Splatter Party,(Feat. Hatsune Miku) [Xena Galia Cover || XH /...,Camellia,Anammelech & Otricity,216.0,278,...,,0.0,0.965528,0.965528,0.0,778.388955,778.388955,778.388955,699.77167,0.899
182,13b2b71,13b2b,https://eu.cdn.beatsaver.com/2e887f6cd0fa896a8...,https://cdn.assets.beatleader.xyz/songcover-13...,Extra Credit on the Chromosome Test! VICTORY R...,(feat. Lil Triangle),Schwank,lobster & FatBeanzoop,175.0,132,...,FS,0.949994,0.96646,0.016466,673.575,776.853089,776.853089,103.278089,92.847002,0.899


In [13]:
px.scatter(scores_df, x="stars", y="accuracy", color="decay_weights", color_continuous_scale="magenta",
           hover_data=["name", "mapper", "type", "difficultyName", "days_since", 
                       "mod_passRating", "mod_accRating", "mod_techRating"], 
           title="Decay Weights for Scores")

In [14]:
scores_df['accuracy_type'] = 'Actual'

predicted_df = scores_df[['stars', 'pred_accuracy']].copy()
predicted_df['accuracy_type'] = 'Predicted'
predicted_df = predicted_df.rename(columns={'pred_accuracy': 'accuracy'})

combined_df = pd.concat([scores_df[['stars', 'accuracy', 'accuracy_type']], predicted_df[['stars', 'accuracy', 'accuracy_type']]], ignore_index=True)

px.scatter(combined_df, x="stars", y="accuracy", color="accuracy_type", title="Actual vs. Predicted Accuracy")