In [41]:
import os
import csv
import json
import numpy as np
import sklearn
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
import pandas

In [23]:
def getMapData(folder):
    with open(f"data/{folder}/info.dat", "r") as f:
        mapData = f.read()
        mapJson = json.loads(mapData)
        return mapJson

def getDifficultyMaps(folder):
    difficultyMaps = []
    mapJson = getMapData(folder)
    for d in mapJson["_difficultyBeatmapSets"][0]["_difficultyBeatmaps"]:
        difficultyMaps.append((d["_beatmapFilename"], d["_difficultyRank"]))
    return difficultyMaps
                
def getDifficultyMapData(folder, file):
    path = f"data/{folder}/{file}"
    with open(f"{path}", "r") as f:
        mapData = f.read()
        mapJson = json.loads(mapData)
        return mapJson

def getMaps():
    maps = []
    for obj in os.listdir("data"):
        objPath = f"data/{obj}"
        if os.path.isdir(objPath) and "info.dat" in os.listdir(objPath):
            maps.append(obj)
    return maps

def getNoteDensity(diffMapData, duration):
    notesList = diffMapData["_notes"]
    return len(notesList)/duration

def beatToSec(beat, bpm):
    return 60/bpm * beat

def getLocalNoteDensities(diffMapData, duration, bpm, windowLength=3, step=0.5):
    densities = []
    beatsPerWindow = bpm/60 * windowLength
    windowLower = 0
    windowUpper = windowLength
    while windowUpper < duration:
        numNotes = 0
        for n in diffMapData["_notes"]:
            noteTime = beatToSec(n["_time"], bpm)
            if windowLower <= noteTime and noteTime <= windowUpper:
                numNotes += 1
        densities.append(numNotes/windowLength)
        windowLower += step
        windowUpper += step
    return densities

In [24]:
with open("data/features.csv", "w") as csvFile:
    csvW = csv.writer(csvFile)
    csvHeader = ["Name", "BPM", "Note Density", "Peak Note Density","Difficulty"]
    csvW.writerow(csvHeader)

    for beatmap in getMaps():
        mapData = getMapData(beatmap)
        songName = mapData["_songName"]
        songDuration = mapData["_songApproximativeDuration"] 
        songBpm = mapData["_beatsPerMinute"]
        
        diffMaps = getDifficultyMaps(beatmap)
        for diffMapObj in diffMaps:
            diffMap = diffMapObj[0]
            diffRank = diffMapObj[1]
            
            diffMapData = getDifficultyMapData(beatmap, diffMap)
            diffMapNoteDensity = getNoteDensity(diffMapData, songDuration)
            maxND = np.max(getLocalNoteDensities(diffMapData, songDuration, songBpm))
            row = [songName, songBpm, diffMapNoteDensity, maxND, diffRank]
            csvW.writerow(row)

In [71]:
TEST_PERCENT = 0.5

# split into train and test samples
data = pandas.read_csv("data/features.csv").values
X_in = data[:, 1:-1]
Y_in = data[:, -1:]
X_train, X_test, Y_train, Y_test = train_test_split(X_in, Y_in, test_size=TEST_PERCENT, random_state=42)

C = np.linspace(0.01, 10, num=20)

for i in range(len(C)):

    # since actual difficulty ranks are integer, we take epsilon = 0.5
    model = make_pipeline(StandardScaler(), SVR(C=C[i], epsilon=0.5))
    model.fit(X_train, np.transpose(Y_train).tolist()[0])

    Y_pred = model.predict(X_test)
    err = mean_squared_error(Y_test, Y_pred)
    print(f"C = {C[i]}, Error: {err} difficulty rank units")


C = 0.01, Error: 2.616649769529915 difficulty rank units
C = 0.5357894736842106, Error: 1.0803151019548574 difficulty rank units
C = 1.0615789473684212, Error: 0.9866687656525351 difficulty rank units
C = 1.5873684210526318, Error: 0.9777001577319859 difficulty rank units
C = 2.113157894736842, Error: 0.9536237798811621 difficulty rank units
C = 2.6389473684210527, Error: 0.9531149872882204 difficulty rank units
C = 3.1647368421052633, Error: 0.9584035095928939 difficulty rank units
C = 3.690526315789474, Error: 0.9785720824923968 difficulty rank units
C = 4.2163157894736845, Error: 0.9998222675532702 difficulty rank units
C = 4.742105263157895, Error: 1.0117277828493905 difficulty rank units
C = 5.267894736842106, Error: 1.0137555616804674 difficulty rank units
C = 5.793684210526316, Error: 0.9978544527536226 difficulty rank units
C = 6.319473684210527, Error: 0.984002879430952 difficulty rank units
C = 6.845263157894737, Error: 0.9836703769895058 difficulty rank units
C = 7.371052631