In [1]:
import requests as re
import hdf5_getters as GETTERS
from midi_methods import *
from collections import defaultdict
from sklearn import linear_model, model_selection
import numpy as np
import json
import os 


In [5]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'From': os.environ.get('MB_USER')
}


def create_query(artist, song, top_genre_count=1):  # return N most common genres for the song, N=top_genre_count 
    mb_url = "https://musicbrainz.org/ws/2/release-group"
    
    params = {
        'query': f'artist:"{artist}" AND recording:"{song}""?inc=genres',
        'fmt': 'json',
    }

    response = re.get(mb_url, params=params)    # GET request

    if response.status_code != 200: 
        print(f"Error for query <{artist}, {song}>: {response.status_code}")
        return None
    
    data = response.json()
    genres = defaultdict(int)   # key: genre, value: number of votes (# labels applied) for that genre

    if not data['release-groups']:  # assert not empty
        print(f"No valid release groups for <{artist}, {song}> found")
        return None 

    for i in range(len(data['release-groups'])):    # scum
        if 'tags' in data['release-groups'][i] and data['release-groups'][i]['tags']:
            for tag in data['release-groups'][i]['tags']:
                genres[tag['name']] += tag['count']

    if not genres:  # assert not empty
        print(f"No valid tags for <{artist}, {song}> found")
        return None 

    sorted_genres = sorted(genres, key=genres.get, reverse=True)[:top_genre_count]  # get N most upvoted genres
    return sorted_genres


test_release = 'cold weather'
test_artist = 'glass beach'
print(create_query(test_artist, test_release, 10))


['rock', 'electronic', 'future jazz', 'downtempo', 'pop rock', 'alternative rock', 'thrash', 'dubstep', 'deep house', 'house']


In [7]:
SCORE_FILE = os.path.join('../lmd_unwrapped_files/', 'match_scores.json')

with open(SCORE_FILE) as f:
    scores = json.load(f)
# Grab a Million Song Dataset ID from the scores dictionary
msd_id = list(scores.keys())[0]
print(f'Million Song Dataset ID {msd_id} has {len(scores[msd_id])} MIDI file matches:')
for midi_md5, score in scores[msd_id].items():
    print( f' {midi_md5} with confidence score {score}')

Million Song Dataset ID TRRNARX128F4264AEB has 1 MIDI file matches:
 cd3b9c8bb118575bcd712cffdba85fce with confidence score 0.7040202098544246


In [11]:
msd_id = list(scores.keys())[-1]
scores[msd_id]

{'6823b02109f413f9a377f76f510d1854': 0.7466964399978611}