In [117]:
import os
import csv
import json
import numpy as np
import sklearn
from sklearn.metrics import mean_squared_error, max_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
import pandas

In [23]:
def getMapData(folder):
    with open(f"data/{folder}/info.dat", "r") as f:
        mapData = f.read()
        mapJson = json.loads(mapData)
        return mapJson

def getDifficultyMaps(folder):
    difficultyMaps = []
    mapJson = getMapData(folder)
    for d in mapJson["_difficultyBeatmapSets"][0]["_difficultyBeatmaps"]:
        difficultyMaps.append((d["_beatmapFilename"], d["_difficultyRank"]))
    return difficultyMaps
                
def getDifficultyMapData(folder, file):
    path = f"data/{folder}/{file}"
    with open(f"{path}", "r") as f:
        mapData = f.read()
        mapJson = json.loads(mapData)
        return mapJson

def getMaps():
    maps = []
    for obj in os.listdir("data"):
        objPath = f"data/{obj}"
        if os.path.isdir(objPath) and "info.dat" in os.listdir(objPath):
            maps.append(obj)
    return maps

def getNoteDensity(diffMapData, duration):
    notesList = diffMapData["_notes"]
    return len(notesList)/duration

def beatToSec(beat, bpm):
    return 60/bpm * beat

def getLocalNoteDensities(diffMapData, duration, bpm, windowLength=3, step=0.5):
    densities = []
    beatsPerWindow = bpm/60 * windowLength
    windowLower = 0
    windowUpper = windowLength
    while windowUpper < duration:
        numNotes = 0
        for n in diffMapData["_notes"]:
            noteTime = beatToSec(n["_time"], bpm)
            if windowLower <= noteTime and noteTime <= windowUpper:
                numNotes += 1
        densities.append(numNotes/windowLength)
        windowLower += step
        windowUpper += step
    return densities

In [24]:
with open("data/features.csv", "w") as csvFile:
    csvW = csv.writer(csvFile)
    csvHeader = ["Name", "BPM", "Note Density", "Peak Note Density","Difficulty"]
    csvW.writerow(csvHeader)

    for beatmap in getMaps():
        mapData = getMapData(beatmap)
        songName = mapData["_songName"]
        songDuration = mapData["_songApproximativeDuration"] 
        songBpm = mapData["_beatsPerMinute"]
        
        diffMaps = getDifficultyMaps(beatmap)
        for diffMapObj in diffMaps:
            diffMap = diffMapObj[0]
            diffRank = diffMapObj[1]
            
            diffMapData = getDifficultyMapData(beatmap, diffMap)
            diffMapNoteDensity = getNoteDensity(diffMapData, songDuration)
            maxND = np.max(getLocalNoteDensities(diffMapData, songDuration, songBpm))
            row = [songName, songBpm, diffMapNoteDensity, maxND, diffRank]
            csvW.writerow(row)

In [76]:
TEST_PERCENT = 0.5

# split into train and test samples
data = pandas.read_csv("data/features.csv").values
X_in = data[:, 1:-1]
Y_in = data[:, -1:]
X_train, X_test, Y_train, Y_test = train_test_split(X_in, Y_in, test_size=TEST_PERCENT, random_state=42)

Y_train = np.transpose(Y_train).tolist()[0]
Y_test = np.transpose(Y_test).tolist()[0]

In [121]:
# find best C parameter
C = np.linspace(0.01, 10, num=50)
for i in range(len(C)):

    # since actual difficulty ranks are integer, we take epsilon = 0.5
    model = make_pipeline(StandardScaler(), SVR(C=C[i], epsilon=0.5))
    model.fit(X_train, Y_train)

    Y_pred = model.predict(X_test)
    errRms = mean_squared_error(Y_test, Y_pred, squared=False)
    errMax = max_error(Y_test, Y_pred)
    print(f"C = {round(C[i], 2)}, Error: {round(errRms, 3)} RMS, {round(errMax, 2)} max")

C = 0.01, Error: 1.618 RMS, 6.46 max
C = 0.21, Error: 1.169 RMS, 5.95 max
C = 0.42, Error: 1.077 RMS, 5.44 max
C = 0.62, Error: 1.03 RMS, 5.26 max
C = 0.83, Error: 1.023 RMS, 5.09 max
C = 1.03, Error: 0.996 RMS, 4.7 max
C = 1.23, Error: 0.99 RMS, 4.55 max
C = 1.44, Error: 0.989 RMS, 4.53 max
C = 1.64, Error: 0.989 RMS, 4.51 max
C = 1.84, Error: 0.98 RMS, 4.35 max
C = 2.05, Error: 0.977 RMS, 4.28 max
C = 2.25, Error: 0.976 RMS, 4.26 max
C = 2.46, Error: 0.976 RMS, 4.23 max
C = 2.66, Error: 0.976 RMS, 4.2 max
C = 2.86, Error: 0.976 RMS, 4.18 max
C = 3.07, Error: 0.977 RMS, 4.14 max
C = 3.27, Error: 0.981 RMS, 4.11 max
C = 3.48, Error: 0.985 RMS, 4.08 max
C = 3.68, Error: 0.989 RMS, 4.05 max
C = 3.88, Error: 0.993 RMS, 4.02 max
C = 4.09, Error: 0.997 RMS, 3.99 max
C = 4.29, Error: 1.001 RMS, 3.96 max
C = 4.5, Error: 1.005 RMS, 3.93 max
C = 4.7, Error: 1.006 RMS, 3.89 max
C = 4.9, Error: 1.006 RMS, 3.86 max
C = 5.11, Error: 1.006 RMS, 3.83 max
C = 5.31, Error: 1.007 RMS, 3.8 max
C = 5.51, 

In [123]:
best_C = 2.66

model = make_pipeline(StandardScaler(), SVR(C=best_C, epsilon=0.5))
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)
err = mean_squared_error(Y_test, Y_pred, squared=False)
print(f"Total RMS Error: {round(err, 2)} units\n")
    
print("Guess    | Actual")
print("---------+-------")
for i in range(len(Y_pred)):
    print(f"{round(Y_pred[i], 1)} ({str(int(round(Y_pred[i], 0))).rjust(2)}) | {str(Y_test[i]).rjust(2)}")

Total RMS Error: 0.98 units

Guess    | Actual
---------+-------
9.5 (10) |  9
7.6 ( 8) |  7
6.7 ( 7) |  8
8.1 ( 8) |  8
8.7 ( 9) |  8
5.7 ( 6) |  5
6.0 ( 6) |  6
7.8 ( 8) |  7
7.5 ( 8) |  7
9.4 ( 9) | 10
7.3 ( 7) |  8
7.9 ( 8) |  9
8.3 ( 8) |  8
7.9 ( 8) |  8
7.2 ( 7) |  7
6.7 ( 7) |  7
5.6 ( 6) |  5
7.7 ( 8) |  7
6.9 ( 7) |  8
7.3 ( 7) |  8
8.0 ( 8) |  8
5.5 ( 5) |  5
7.9 ( 8) |  8
7.9 ( 8) |  7
7.1 ( 7) |  6
6.6 ( 7) |  6
7.1 ( 7) |  6
8.6 ( 9) |  8
9.8 (10) | 10
6.7 ( 7) |  5
5.2 ( 5) |  6
9.3 ( 9) | 10
7.2 ( 7) |  8
8.5 ( 9) |  7
4.6 ( 5) |  4
6.6 ( 7) |  6
4.6 ( 5) |  5
6.8 ( 7) |  6
6.2 ( 6) |  6
8.8 ( 9) |  9
5.2 ( 5) |  1
5.4 ( 5) |  7
6.7 ( 7) |  8
6.6 ( 7) |  6
6.2 ( 6) |  5
8.5 ( 9) |  8
7.3 ( 7) |  8
6.2 ( 6) |  6
7.5 ( 8) |  8
8.8 ( 9) |  8
9.5 ( 9) |  9
9.3 ( 9) |  8
8.7 ( 9) | 10
5.6 ( 6) |  6
9.5 ( 9) |  8
7.1 ( 7) |  5
7.0 ( 7) |  8
9.6 (10) |  9
7.3 ( 7) |  8
7.2 ( 7) |  6
6.2 ( 6) |  6
9.5 ( 9) | 10
6.2 ( 6) |  8
9.2 ( 9) |  9
7.2 ( 7) |  7
8.5 ( 8) |  9
7.7 ( 8) | 