In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from circleguard import Circleguard, ReplayPath
from circleguard import KeylessCircleguard, ReplayDir
from circleguard.judgment import JudgmentType
from slider import Library
from slider.beatmap import Circle, Slider, Spinner

plt.style.use("ggplot")
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)

In [2]:
INDEX_PATH = "data/index.csv"
BEATMAP_PATH = "data/beatmaps"
REPLAY_PATH = "data/replays/osr"
DB_PATH = "data/replays/osr/.circleguard.db"
SLIDER_PATH = "data/beatmaps"

## Load Data

In [3]:
# index and beatmaps

index_df = pd.read_csv(INDEX_PATH, low_memory = False)
beatmap_library = Library(BEATMAP_PATH)

print(f"Num. replays in index: {len(index_df)}")
print(f"Num. of beatmaps in library: {len(beatmap_library.ids)}")
index_df.head()

Num. replays in index: 381570
Num. of beatmaps in library: 55079


Unnamed: 0.1,Unnamed: 0,replayHash,beatmapHash,summary,date,playerName,modsReadable,mods,performance-IsFail,performance-Accuracy,performance-Score,performance-300s,performance-100s,performance-50s,performance-Misses,performance-Geki,performance-Katu,performance-MaxCombo,performance-IsFC,beatmap-Artist,beatmap-Title,beatmap-Version,beatmap-Id,beatmap-SetId,beatmap-BPMMin,beatmap-BPMMax,beatmap-HP,beatmap-OD,beatmap-AR,beatmap-CS,beatmap-MaxCombo,beatmap-HitObjects,beatmap-Circles,beatmap-Sliders,beatmap-Spinners,beatmapPlay-BPMMin,beatmapPlay-BPMMax,beatmapPlay-HP,beatmapPlay-OD,beatmapPlay-AR,beatmapPlay-CS,osrReplayUrl
0,0,857623324645f59599fc4e9c1c7e1130,db4cdde15984869de346686ceb6bc1a5,[7.44 ⭐] My Angel Shori | Imperial Circus Dead...,2022-11-11T08:03:56.656728,My Angel Shori,,0,False,0.954368,28363840,1771,72,1,37,422,50,950,False,Imperial Circus Dead Decadence,Hyakki Yakou -Pandemonic Night Parade-,Youkai,3638479.0,1776682.0,134,235,5.0,9.2,9.7,4.5,2515,1881,1438,442,1,134.0,235.0,5.0,9.2,9.7,4.5,https://dl.issou.best/ordr/replays/cfc43370ca0...
1,1,b1675645282756f18e0c3f142f290d94,04129d1b26d6bf3f35cb00a04f4a8c88,[6.55 ⭐] AlexTheProtoTTV | katagiri - Buta Mus...,2022-11-11T04:37:52.235160,AlexTheProtoTTV,,0,False,0.857687,2363246,991,174,10,50,274,103,194,False,katagiri,Buta Musou,AR 9.8,3777132.0,1647421.0,210,210,4.5,9.2,9.8,3.8,1660,1225,832,392,1,210.0,210.0,4.5,9.2,9.8,3.8,https://dl.issou.best/ordr/replays/e35f6f3e3b1...
2,2,6170131dcd9ad32d4c0287422b256423,8e3e4e77e6498a994bfa7505e735155d,[6.37 ⭐] Asio_ | Qrispy Joybox feat. mao - Col...,2022-11-11T08:01:02.823288,Asio_,HDDT,72,False,0.916667,777289,272,31,0,5,57,20,136,False,Qrispy Joybox feat. mao,Colorful Minutes,Beautiful Time,2584022.0,1242911.0,152,152,5.2,8.5,9.0,3.8,438,308,189,118,1,228.0,228.0,5.2,10.083333,10.333333,3.8,https://dl.issou.best/ordr/replays/f214715d45d...
3,3,d3fc501dadc1fabd60f847432cc3de84,27f9c5f8496bfdab8623e3be6cf73f25,[6.3 ⭐] sotarks fan123 | DragonForce - Valley ...,2022-11-11T07:46:29.042768,sotarks fan123,,0,True,0.948815,45264320,1111,68,1,15,182,42,1485,False,DragonForce,Valley of the Damned,Apocalypse,675734.0,67565.0,71,200,6.0,8.5,9.2,4.0,3089,2148,1486,660,2,71.0,200.0,6.0,8.5,9.2,4.0,https://dl.issou.best/ordr/replays/cf3f6778f86...
4,4,8e62a939e4c97fe22cbd90e90d338a53,1e8f966c7a8f992cb2f3f5bab7d55925,[6.34 ⭐] XxSzymenxx | DragonForce - Through th...,2022-03-11T10:40:06.741672,XxSzymenxx,,0,True,0.731128,486150,254,85,30,24,29,19,99,False,DragonForce,Through the Fire and Flames,Myth,1001682.0,382400.0,170,200,6.2,9.0,9.5,4.0,3220,2126,1534,587,5,170.0,200.0,6.2,9.0,9.5,4.0,https://dl.issou.best/ordr/replays/b90ed42a04b...


In [83]:
# replays

cg = KeylessCircleguard( 
    db_path = DB_PATH,
    slider_dir = SLIDER_PATH,
    cache = True
)
replay_dir = ReplayDir(REPLAY_PATH)

cg.load_info(replay_dir)
replay_dir.replays = replay_dir.replays[:2500] # only considering a subset of replays for now
cg.load(replay_dir)

print(f"Num Replays Loaded: {len(replay_dir.all_replays())}")

Num Replays Loaded: 2500


## Parse Notes

In [5]:
def get_hitobject_embedding(prev_obj, curr_obj, next_obj):
    pass

def filter_out_spinners(objects):
    return [o for o in objects if type(o) in (Slider, Circle)]

def get_embeddings(replay):

    beatmap = beatmap_library.lookup_by_md5(replay.beatmap_hash)

    res = []
    hitobjects = filter_out_spinners( beatmap.hit_objects() )
    
    res.append(get_hitobject_embedding(None, hitobjects[0], hitobjects[1]))
    for i in range(1, len(hitobjects) - 1):
        res.append( get_hitobject_embedding(hitobjects[i-1], hitobjects[i], hitobjects[i+1] ))
    res.append(get_hitobject_embedding(hitobjects[-2], hitobjects[-1], None))

    return res


In [6]:
def sort_judgments(judgments):
    try:
        return sorted(judgments, key = lambda j: j.hitobject.t)
    except TypeError:
        return []
    
def encode_judgment(judgment):
     t = judgment.type
     if t not in (JudgmentType.Hit300, JudgmentType.Hit100, JudgmentType.Hit50, JudgmentType.Miss):
        print(t)
        raise Exception()
     return [
          1 if t == JudgmentType.Hit300 else 0, 
          1 if t == JudgmentType.Hit100 else 0,
          1 if t == JudgmentType.Hit50 else 0,
          1 if t == JudgmentType.Miss else 0
     ]

def get_judgments(replay):
    return [ encode_judgment(j) for j in sort_judgments( cg.judgments(replay) ) ]

In [91]:
def validate_replay(replay):


    # sequence length

    beatmap = beatmap_library.lookup_by_md5(replay.beatmap_hash)
    beatmap_objects = beatmap.hit_objects()
    total_num_objects = len(beatmap_objects)
    
    beatmap_objects = filter_out_spinners(beatmap_objects)
    num_spinners = total_num_objects - len(beatmap_objects)

    replay_judgments = sort_judgments( cg.judgments(replay) )

    if len(beatmap_objects) != len(replay_judgments):
        print(f"Length mismatch between beatmap objects {len(beatmap_objects)} and replay judgments {len(replay_judgments)} of replay at {replay.path}.")
        return -1

    replay_objects = [ j.hitobject for j in replay_judgments]

    if len(replay_objects) != len(beatmap_objects):
        print(f"Length mismatch between replay objects {len(replay_objects)} and beatmap objects {len(beatmap_objects)} of replay at {replay.path}.")
        return -2
    

    # hitcounts

    judgment_encodings = [encode_judgment(j) for j in replay_judgments]
    judgment_hitcounts = np.sum(judgment_encodings, axis = 0)
    replay_hitcounts = np.array([ replay.count_300, replay.count_100, replay.count_50, replay.count_miss])

    hitcount_err = replay_hitcounts - judgment_hitcounts

    if sum(hitcount_err) != num_spinners:
        print(f"Hitcount mismatch ({judgment_hitcounts} vs. {replay_hitcounts}, num_spinners = {num_spinners}) of replay at {replay.path}.")
        return -3
        

    # position & offset alignment

    EPSILON = 1e-06

    for idx, _ in enumerate(beatmap_objects):

        replay_obj = replay_objects[idx]
        beatmap_obj = beatmap_objects[idx]

        if abs( replay_obj.time - beatmap_obj.time.total_seconds() * 1000 ) >= EPSILON : 
            print(f"Offset mismatch ({1.0 * replay_obj.time} vs. {beatmap_obj.time.total_seconds() * 1000}) at index {idx} of replay at {replay.path}.")
            return -4

        if "HR" not in str(replay.mods) and "EZ" not in str(replay.mods): 

            if replay_obj.x - beatmap_obj.position.x >= EPSILON:
                print(f"Position mismatch (x={replay_obj.x} vs. x={beatmap_obj.position.x}) at index {idx} of replay at {replay.path}.")
                return -5
            
            if replay_obj.y - beatmap_obj.position.y >= EPSILON:
                print(f"Position mismatch (y={replay_obj.y} vs. y={beatmap_obj.position.y}) at index {idx} of replay at {replay.path}.")
                return -5
            
    return 1


In [92]:
res = []
for idx, replay in enumerate(replay_dir):
    res.append(validate_replay(replay))

Hitcount mismatch ([118  23   1 916] vs. [109  31   2   9], num_spinners = 2) of replay at w:\Documents\osu490\data\replays\osr\00000f8351988aea8881aee93ffa7bbd.osr.
Hitcount mismatch ([1222  264   41  794] vs. [1189  312   46   61], num_spinners = 0) of replay at w:\Documents\osu490\data\replays\osr\0000cb47a2940ce372322c033e4fed90.osr.
Hitcount mismatch ([ 421   66    6 2054] vs. [419  74   6  34], num_spinners = 7) of replay at w:\Documents\osu490\data\replays\osr\0003181675e2bbf8d84bf73b50ae936a.osr.
Hitcount mismatch ([769  79   4 414] vs. [764  87   4  17], num_spinners = 0) of replay at w:\Documents\osu490\data\replays\osr\00067865d7cc165c359f01ada7318f9e.osr.
Hitcount mismatch ([566   0   0 687] vs. [572   0   0   8], num_spinners = 7) of replay at w:\Documents\osu490\data\replays\osr\0007db7d4277e660881bbb4890d51641.osr.
Hitcount mismatch ([161  27   1 115] vs. [161  28   1  12], num_spinners = 1) of replay at w:\Documents\osu490\data\replays\osr\00090b94dce3c3e227011bae36ad66

In [95]:
res = np.array(res)
np.mean(res == 1)

0.8616

In [93]:
mods = pd.Series([str(r.mods) for r in replay_dir])
mods.value_counts()

NM            1364
HDDT           268
HD             203
DT             177
HDHR            73
NF              70
HR              65
HDNC            43
NC              24
HDNF            19
HDDTHR          19
EZ              16
V2              11
FL              10
HDDTHRFL         9
EZDT             9
NFV2             7
DTNF             7
HRNF             6
EZHD             6
HDDTNF           6
HT               5
HRNFV2           5
HDHRSD           4
SO               4
NCHR             4
HDHRFL           4
HDNFV2           3
SD               3
DTHR             3
EZHDDT           3
HDHRNF           3
HDNCHRFL         3
DTHRFL           2
HDNCHRFLPF       2
DTNFV2           2
EZNFV2           2
HTHR             2
HRV2             2
HDDTSD           2
NCNF             2
HDDTHRSD         2
EZHDFL           2
HDNCNF           2
EZHT             2
HDSD             2
HDNCHRSD         1
HDNCSO           1
DTHRNF           1
EZNC             1
HDDTPF           1
EZFL             1
HDHTHR      

In [98]:
# import json
# with open("embeddings.json", "w") as f:
#     json.dump(embeddings, f)

# with open("judgments.json", "w") as f:
#     json.dump(judgments, f)