In [1]:
# necessary imports
import sys
import os
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import PolynomialFeatures
from json import loads, dumps

sys.path.append(os.path.abspath(os.path.join('..')))

from app.fetcher import fetch_maps, fetch_scores
from app.models import predict_scores

### Fetch Maps and Scores

In [2]:
# fetch ranked maps (takes 1-2 min)
# maps_df = fetch_maps()
# maps_df.to_csv("ranked_maps.csv")

In [3]:
# load ranked maps
maps_df = pd.read_csv("ranked_maps.csv")

In [4]:
maps_df.head()

Unnamed: 0.1,Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,...,stars,passRating,accRating,techRating,difficultyName,type,starsMod,passRatingMod,accRatingMod,techRatingMod
0,0,57c31,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,...,3.565262,0.992113,5.772469,3.247698,Normal,Accuracy,3.565262,0.992113,5.772469,3.247698
1,1,57c51,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,...,5.456755,2.433202,7.259916,6.221073,Hard,Tech,5.456755,2.433202,7.259916,6.221073
2,2,57c71,57c,https://eu.cdn.beatsaver.com/f15082586d31d238c...,https://cdn.assets.beatleader.xyz/songcover-57...,Zzz,by Sasaki Sayaka,Todokete,todokete,154.0,...,6.634682,3.184074,8.040394,8.696276,Expert,Tech,6.634682,3.184074,8.040394,8.696276
3,3,1fd41x11,1fd41x,https://eu.cdn.beatsaver.com/1fb8d26ef00049d75...,https://cdn.assets.beatleader.xyz/songcover-1f...,Zombified,,Falling In Reverse,Bytrius,182.0,...,2.996581,1.331649,5.175914,1.15891,Easy,Accuracy,2.996581,1.331649,5.175914,1.15891
4,4,1fd41x31,1fd41x,https://eu.cdn.beatsaver.com/1fb8d26ef00049d75...,https://cdn.assets.beatleader.xyz/songcover-1f...,Zombified,,Falling In Reverse,Bytrius,182.0,...,4.16884,2.34582,6.326489,1.984545,Normal,Accuracy,4.16884,2.34582,6.326489,1.984545


In [5]:
# fetch player scores (takes 6-20 sec.)
id = "thinkingswag"
scores_df = fetch_scores(id)

In [6]:
scores_df.head()

Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,duration,...,starsMod,accuracy,pp,rank,modifiers,fullCombo,currentMods,predictedMods,dateset,timeAgo
0,2b85fxx71,2b85fxx,https://eu.cdn.beatsaver.com/09fd6d30c55f6d721...,https://cdn.assets.beatleader.xyz/songcover-2b...,At Least Speedcore Artists Aren't In It For Th...,,Loffciamcore & Imil,Slayx,260.0,143,...,7.976207,0.974111,840.22034,2,SF,True,[SF],[SF],2023-01-25 17:58:22,2 years ago
1,3bcf5xxxxxxxx91,3bcf5xxxxxxxx,https://cdn.beatsaver.com/187bea15de6bd7301239...,https://cdn.assets.beatleader.xyz/songcover-3b...,nieuwe tune,,gladde paling & vieze vaatdoek,Stupidity-101,180.0,93,...,14.775179,0.941264,794.4309,2,FS,False,[FS],[FS],2024-12-13 18:49:16,1 year ago
2,1cd7791,1cd77,https://eu.cdn.beatsaver.com/08d67c25e377d2013...,https://cdn.assets.beatleader.xyz/songcover-1c...,Deception,,Dance Gavin Dance,cerret,316.0,233,...,8.379396,0.969104,783.93976,2,SF,False,[SF],[SF],2023-05-13 14:04:36,2 years ago
3,2a1b391,2a1b3,https://eu.cdn.beatsaver.com/604ac21a79c26207c...,https://cdn.assets.beatleader.xyz/songcover-2a...,II. Anal Prolapse Suffocation,,Infant Annihilator,Vilawes,350.0,182,...,10.19641,0.974313,773.7751,2,FS,True,[FS],[FS],2023-03-15 18:38:45,2 years ago
4,1cdc691,1cdc6,https://eu.cdn.beatsaver.com/eaddeb51358bbd688...,https://cdn.assets.beatleader.xyz/songcover-1c...,We Like To Party! (The Vengabus),[Fvrwvrd Bootleg],Vengaboys,cerret,320.0,125,...,10.649382,0.972415,768.4327,3,FS,True,[FS],[FS],2023-03-15 18:27:35,2 years ago


In [7]:
# calculate days since scores were set
max_date = scores_df["dateset"].max()
scores_df["days_since"] = (max_date - scores_df["dateset"]).dt.days

### Train Model

Decay Function: $\text{Weight}(t) = e^{-\lambda \times \frac{\text{days since}}{14}}$

In [8]:
# apply weighted decay function so newer scores have more influence on the model
lambda_value = 0.1
decay_weights = np.exp(-lambda_value * scores_df["days_since"] / 14) # 2 weeks
scores_df["decay_weights"] = decay_weights

In [9]:
# set up features for exponential regression model

# modified ratings as independent variables
X = scores_df[["passRatingMod", "accRatingMod", "techRatingMod"]].values.reshape(-1, 3)
X_poly = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)

# dependent variable; invert to mimic downward curve
y = scores_df["accuracy"].to_numpy().reshape(-1, 1)
y_inv = (1 - scores_df["accuracy"]).to_numpy().reshape(-1, 1)
y_inv_log = np.log(y_inv)

LOBF Equation: $\text{model} = (X^TWX)^{-1}X^TWy$

In [10]:
# train model
W = np.diag(decay_weights)
X_poly_bias = np.column_stack([np.ones(X_poly.shape[0]), X_poly])

XtW = np.matmul(X_poly_bias.T, W)
XtWX_inv = np.linalg.inv(np.matmul(XtW, X_poly_bias))
XtWy = np.matmul(XtW, y_inv_log)

model = np.matmul(XtWX_inv, XtWy)
model

array([[-4.65709694e+00],
       [ 1.01325167e-02],
       [ 5.50377443e-02],
       [-2.56109136e-02],
       [-9.59968314e-04],
       [ 2.73095181e-03],
       [-4.21244860e-03],
       [ 3.07919730e-03],
       [-3.67394852e-03],
       [ 1.33472578e-02]])

### Predict Scores

In [11]:
# predict scores
ypreds_inv = np.dot(X_poly, model[1:]) + model[0]
ypreds = 1 - np.exp(ypreds_inv)

scores_df["pred_accuracy"] = ypreds
scores_df[["accuracy", "pred_accuracy"]].head()

Unnamed: 0,accuracy,pred_accuracy
0,0.974111,0.965228
1,0.941264,0.941145
2,0.969104,0.967148
3,0.974313,0.968422
4,0.972415,0.967232


### Visualizations

In [12]:
px.scatter(scores_df, x="stars", y="accuracy", color="decay_weights", color_continuous_scale="magenta",
           hover_data=["name", "mapper", "type", "difficultyName", "days_since", 
                       "passRatingMod", "accRatingMod", "techRatingMod"], 
           title="Decay Weights for Scores")

In [13]:
scores_df['accuracy_type'] = 'Actual'

predicted_df = scores_df[['stars', 'pred_accuracy']].copy()
predicted_df['accuracy_type'] = 'Predicted'
predicted_df = predicted_df.rename(columns={'pred_accuracy': 'accuracy'})

combined_df = pd.concat([scores_df[['stars', 'accuracy', 'accuracy_type']], predicted_df[['stars', 'accuracy', 'accuracy_type']]], ignore_index=True)

px.scatter(combined_df, x="stars", y="accuracy", color="accuracy_type", title="Actual vs. Predicted Accuracy")

### All Predictions

In [14]:
# predict for all maps
predictions_df = predict_scores(model, scores_df, maps_df)
predictions_df.head()

Unnamed: 0,leaderboardId,songId,cover,fullCover,name,subName,author,mapper,bpm,duration,...,predictedMods,currentAccuracy,predictedAccuracy,accuracyGained,currentPP,predictedPP,maxPP,unweightedPPGain,weightedPPGain,weight
807,3ce7axxxxxxxxxxx91,3ce7axxxxxxxxxxx,https://cdn.beatsaver.com/6343bbda0b1c75d52423...,https://cdn.assets.beatleader.xyz/songcover-3c...,The Purple Dimension,Extended Version,ToonTubers,Cratornugget & ViSi,464.0,187,...,,0.0,0.965241,0.965241,0.0,857.811191,857.811191,857.811191,857.811191,1.0
0,2b85fxx71,2b85fxx,https://eu.cdn.beatsaver.com/09fd6d30c55f6d721...,https://cdn.assets.beatleader.xyz/songcover-2b...,At Least Speedcore Artists Aren't In It For Th...,,Loffciamcore & Imil,Slayx,260.0,143,...,[SF],0.974111,0.965228,0.0,840.22034,733.210017,840.22034,0.0,0.0,1.0
1019,2c888xxxxx91,2c888xxxxx,https://eu.cdn.beatsaver.com/3f567bc5cc7ada8c9...,https://cdn.assets.beatleader.xyz/songcover-2c...,Splatter Party,(Feat. Hatsune Miku) [Xena Galia Cover || XH /...,Camellia,Anammelech & Otricity,216.0,278,...,,0.0,0.96933,0.96933,0.0,815.693588,815.693588,815.693588,787.144313,0.965
182,13b2b71,13b2b,https://eu.cdn.beatsaver.com/2e887f6cd0fa896a8...,https://cdn.assets.beatleader.xyz/songcover-13...,Extra Credit on the Chromosome Test! VICTORY R...,(feat. Lil Triangle),Schwank,lobster & FatBeanzoop,175.0,132,...,[FS],0.949994,0.970044,0.020051,673.575,811.729609,811.729609,138.154609,133.319198,0.965
20,1ace571,1ace5,https://eu.cdn.beatsaver.com/61caada06a65088bd...,https://cdn.assets.beatleader.xyz/songcover-1a...,Venomous Firefly,,Camellia,ComplexFrequency,264.0,122,...,[SF],0.952253,0.962209,0.009956,742.475,808.41877,808.41877,65.94377,63.635738,0.965


In [15]:
# api reponse
resp = predictions_df.to_json(orient="records")
parsed = loads(resp)
print(dumps(parsed, indent=4))

[
    {
        "leaderboardId": "3ce7axxxxxxxxxxx91",
        "songId": "3ce7axxxxxxxxxxx",
        "cover": "https://cdn.beatsaver.com/6343bbda0b1c75d52423e6273858c1d80b6f9326.jpg",
        "fullCover": "https://cdn.assets.beatleader.xyz/songcover-3ce7axxxxxxxxxxx-full.webp",
        "name": "The Purple Dimension",
        "subName": "Extended Version",
        "author": "ToonTubers",
        "mapper": "Cratornugget & ViSi",
        "bpm": 464.0,
        "duration": 187,
        "difficultyName": "ExpertPlus",
        "type": "Speed",
        "stars": 15.605963,
        "passRating": 16.897497,
        "accRating": 12.630503,
        "techRating": 6.4419417,
        "starsMod": 15.605963,
        "passRatingMod": 16.897497,
        "accRatingMod": 12.630503,
        "techRatingMod": 6.4419417,
        "status": "unplayed",
        "rank": null,
        "timeAgo": null,
        "currentMods": null,
        "predictedMods": null,
        "currentAccuracy": 0.0,
        "predictedAccura

In [16]:
# save predictions to json file
predictions_df.to_json("predictions.json", orient="records", compression="infer")