In [49]:
import requests
import json
import time

Static Fields

May be worth messing around with these fields if the dataset is still not good enough. `MAX_NPS` and `MAX_BPM` filter out the challenge maps which spam notes and thus should have an abnormally high nps or bpm. `MIN_NPS` filters out the slow accuracy maps that may have strange patterns only hittable at low speeds. `MIN_DURATION_SECONDS` is to filter out the shorter maps that usually take less time and thought to put together. `MIN_RATING` and `MIN_UPVOTES` are to make sure the maps are actually well liked.

In [105]:
START_DATE = '2021-01-01'
END_DATE = '2023-09-01'
MAX_NPS = 13
MIN_NPS = 4
MAX_BPM = 350
MIN_DURATION_SECONDS = 60
MIN_RATING = 0.7
MIN_UPVOTES = 40

In [107]:
def generateDataset(max_requests=1):
    end = END_DATE
    num_requests = 0
    pruned_map_ids = []
    while num_requests != max_requests:
        maps_result = requests.get('https://api.beatsaver.com/search/text/0?from=' + START_DATE + '&maxBpm=' + str(MAX_BPM) + '&maxNps=' + str(MAX_NPS) + '&minDuration=' + str(MIN_DURATION_SECONDS) + '&minNps=' + str(MIN_NPS) + '&minRating=' + str(MIN_RATING) + '&noodle=false&sortOrder=Latest&to=' + end)
        num_requests += 1
        maps_json = json.loads(maps_result.text)
        pruned_map_ids += pruneMaps(maps_json['docs'])
        end = maps_json['docs'][-1]['createdAt']

        # Beatsaver will send maps in batches of 20 if there are enough maps left
        if len(maps_json['docs']) < 20:
            break
    return pruned_map_ids

def pruneMaps(maps_json):
    pruned_map_ids = []
    for map in maps_json:
        # Min Upvotes check
        if map['stats']['upvotes'] < MIN_UPVOTES:
            continue

        beatleader_info = json.loads(requests.get('https://api.beatleader.xyz/leaderboards/hash/'+map['versions'][0]['hash']).text)
        time.sleep(0.1) # Precaution to not exceed maximum requests/second

        # Check that any Standard difficulty does not require Mapping Extensions, Noodle Extensions, or V3 notes.
        for diff in beatleader_info['song']['difficulties']:
            if diff['mode'] == 1 and diff['requirements'] & 0b101100 != 0:
                break
        else:
            pruned_map_ids.append(map['id'])
    return pruned_map_ids

In [108]:
map_ids = generateDataset(max_requests=-1)

KeyboardInterrupt: 

In [None]:
dataset_txt = open('dataset_ids.txt', 'w')
dataset_txt.write(json.dumps(map_ids, indent=4))