In [2]:
# Reading the dataset
# Count the number of beatmaps and difficulties

import os
import numpy as np
import rosu_pp_py as osu

def count_beatmaps():
    beatmaps_count = 0
    difficulties_count = 0
    for root, dirs, files in os.walk("dataset/beatmaps"):
        # The number of top-level folders corresponds to the number of beatmaps
        beatmaps_count += len(dirs)
        # The number of .osu files (in all subfolders) corresponds to the number of difficulties
        difficulties_count += len([name for name in files if name.endswith(".osu")])
    return beatmaps_count, difficulties_count

beatmaps_count, difficulties_count = count_beatmaps()
print("Beatmaps: ", beatmaps_count)
print("Difficulties: ", difficulties_count)

# Save path to beatmap .osu files in a list, respecting the folder structure
# e.g. "dataset/beatmaps/beatmap1/difficulty1.osu". Every beatmap gets a list of paths to its difficulties

def get_beatmap_paths():
    beatmap_paths = []
    # Respect the folder structure
    for root, dirs, files in os.walk("dataset/beatmaps"):
        for dir in dirs:
            beatmap_path = []
            for root, dirs, files in os.walk(os.path.join("dataset/beatmaps", dir)):
                for file in files:
                    if file.endswith(".osu"):
                        beatmap_path.append(os.path.join(root, file))
            beatmap_paths.append(beatmap_path)
    
    return beatmap_paths

beatmaps = get_beatmap_paths()

Beatmaps:  356
Difficulties:  1718


In [16]:
# Prepare the dataset
# Read metadata from .osu files

def read_metadata(beatmaps : list[list[str]]):
    metadata = {}
    for difficulty in beatmaps:
        for file in difficulty:
            with open(file, "r", encoding="utf-8") as f:
                lines = f.readlines()
                for line in lines:
                    if line.startswith("Title:"):
                        title = line.split(":")[1].strip()
                    elif line.startswith("Artist:"):
                        artist = line.split(":")[1].strip()
                    elif line.startswith("Version:"):
                        version = line.split(":")[1].strip()
                metadata[title] = {"artist": artist, "version": version}
    return metadata

map = osu.Beatmap(path = beatmaps[0][0])
perf = osu.Performance()
attr = perf.calculate(map)
max_attrs = perf.calculate(attr)

print("AR", max_attrs.difficulty.ar)
print("Star Rating", max_attrs.difficulty.stars)
print("PP", max_attrs.pp)
print("BPM", map.bpm)
print("Song", beatmaps[0][0])

AR 9.800000190734863
Star Rating 8.077242505421664
PP 834.5441923076489
BPM 190.0002850004275
Song dataset/beatmaps/1003201 Ata - Euphoria (evilxmaniac)/Ata - Euphoria (evilxmaniac) [Ciyus Miapah's Ultimate Power].osu
