In [1]:
import json
import pandas
import numpy as np
import copy
import os
#from __future__ import unicode_literals
from fileoperations.fileoperations import get_filenames_in_dir
from makammusicbrainz.audiometadata import AudioMetadata


In [2]:
tonic_dict = {}
item_template = {'mbid': '', 'annotations': [], 'verified':False}
anno_template = {'source': '', 'value': None, 'observations': '', 'octave_wrapped': True}
mb_str = 'http://musicbrainz.org/recording/'
audiometacrawler = AudioMetadata()


In [3]:
# get annotations from senturk score informed tonic identification
senturk_data = pandas.read_csv('./senturk2013karar_ismir/tonic_annotations.csv', sep=',')
for ss in senturk_data.iterrows():
    if not ss[1]['MBID'] in tonic_dict.keys():
        # add the recording to the dict
        tonic_dict[ss[1]['MBID']] = copy.deepcopy(item_template)
        tonic_dict[ss[1]['MBID']]['mbid'] = mb_str + ss[1]['MBID']
        tonic_dict[ss[1]['MBID']]['verified'] = False
    else:
        raise RuntimeError('should not happen')
           
    # add the tonic value
    tonic_dict[ss[1]['MBID']]['annotations'].append(copy.deepcopy(anno_template))
    tonic_dict[ss[1]['MBID']]['annotations'][-1]['source'] = 'https://github.com/MTG/otmm_tonic_dataset/blob/7f28c1a3261b9146042155ee5e0f9e644d9ebcfa/senturk2013karar_ismir/tonic_annotations.csv'
    tonic_dict[ss[1]['MBID']]['annotations'][-1]['value'] = ss[1]['Tonic_Annotation_(Hz)']
    tonic_dict[ss[1]['MBID']]['annotations'][-1]['octave_wrapped'] = True

In [4]:
# get annotations from TD12in atli last note fma
atli_data2 = pandas.read_csv('./atli2015tonic_fma/TD2.csv', sep=',')
for ad2 in atli_data2.iterrows():
    if not ad2[1]['MBID'] in tonic_dict.keys():
        # add the recording to the dict
        tonic_dict[ad2[1]['MBID']] = copy.deepcopy(item_template)
        tonic_dict[ad2[1]['MBID']]['mbid'] = mb_str + ad2[1]['MBID']
        tonic_dict[ad2[1]['MBID']]['verified'] = False
    else:
        pass
        
    if (tonic_dict[ad2[1]['MBID']]['annotations'] and 
        'TD2' in tonic_dict[ad2[1]['MBID']]['annotations'][-1]['source'] and 
        tonic_dict[ad2[1]['MBID']]['annotations'][-1]['value'] != ad2[1]['Tonic_Annotation_(Hz)']):
        
        print ad2[1]['MBID'] + ' ' + str(tonic_dict[ad2[1]['MBID']]['annotations'][-1]['value'])  + ' ' + str(ad2[1]['Tonic_Annotation_(Hz)'])
    
    # add the tonic value
    tonic_dict[ad2[1]['MBID']]['annotations'].append(copy.deepcopy(anno_template))
    tonic_dict[ad2[1]['MBID']]['annotations'][-1]['source'] = 'https://github.com/MTG/otmm_tonic_dataset/blob/7f28c1a3261b9146042155ee5e0f9e644d9ebcfa/atli2015tonic_fma/TD2.csv'

    tonic_dict[ad2[1]['MBID']]['annotations'][-1]['value'] = ad2[1]['Tonic_Annotation_(Hz)']
    tonic_dict[ad2[1]['MBID']]['annotations'][-1]['octave_wrapped'] = True       

a527a6bf-3ddd-4622-abb8-f93d80848e6c 496 247
553ea4d2-9195-440f-9c42-01655edef0a8 220 221


In [5]:
# get annotations from karakurt makam recognition
karakurt_data = json.load(open('../otmm_makam_recognition_dataset/annotations.json'))

for kd in karakurt_data:
    mbid = os.path.split(kd['mbid'])[-1]
    
    if not mbid in tonic_dict.keys():
        # add the recording to the dict
        tonic_dict[mbid] = copy.deepcopy(item_template)
        tonic_dict[mbid]['mbid'] = mb_str + mbid
        tonic_dict[mbid]['verified'] = kd['verified']
    elif not tonic_dict[mbid]['verified']:
        tonic_dict[mbid]['verified'] = kd['verified']
        
    # add the tonic value
    tonic_dict[mbid]['annotations'].append(copy.deepcopy(anno_template))
    tonic_dict[mbid]['annotations'][-1]['source'] = 'https://github.com/MTG/otmm_makam_recognition_dataset/blob/216a7b30632032b2a8c2692d5f5895bedbc743e0/annotations.json'
    tonic_dict[mbid]['annotations'][-1]['value'] = kd['tonic']
    tonic_dict[mbid]['annotations'][-1]['observations'] = kd['observations']
    tonic_dict[mbid]['annotations'][-1]['octave_wrapped'] = True

    if kd['observations']:
        print mbid   
      

deadd528-5faf-4377-8c68-ea7145112c34


In [6]:
# get annotations from senturk composition identification
tonic_files = get_filenames_in_dir('../otmm_composition_identification_dataset/audio/', keyword='tonic.json')[0]
for tf in tonic_files:
    try:
        mfile = os.path.join(os.path.split(tf)[0], u'metadata.json')
        mbid = json.load(open(mfile))['mbid']

        st = json.load(open(tf))['annotation']
        if not mbid in tonic_dict.keys():
            # add the recording to the dict
            tonic_dict[mbid] = copy.deepcopy(item_template)
            tonic_dict[mbid]['mbid'] = mb_str + mbid
            tonic_dict[mbid]['verified'] = False

        add_bool = True
        if st['Citation'] == u'atli2015fma':  # tonic from atli 2015
            for anno in tonic_dict[mbid]['annotations']:
                if 'TD' in anno['source'] and np.isclose(anno['value'], st['Value']):
                    add_bool = False
            if add_bool:
                print (mbid, st)
                
        elif 'Salamon' in st['Citation']:  # tonic from senturk score-informed tonic
            for anno in tonic_dict[mbid]['annotations']:
                if 'senturk2013karar_ismir' in anno['source'] and np.isclose(anno['value'], st['Value']):
                    add_bool = False
            if add_bool:
                print (mbid, st)
                
        if add_bool:
            tonic_dict[mbid]['annotations'].append(copy.deepcopy(anno_template))

            tonic_dict[mbid]['annotations'][-1]['source'] = 'https://github.com/MTG/otmm_composition_identification_dataset/tree/smc2016'
            tonic_dict[mbid]['annotations'][-1]['value'] = round(st['Value'], 1)
            if st['OctaveWrapped']:
                tonic_dict[mbid]['annotations'][-1]['octave_wrapped'] = True
            else:
                tonic_dict[mbid]['annotations'][-1]['octave_wrapped'] = False
    except KeyError:
        print (tf, json.load(open(tf)).keys())

(u'5b734939-3f33-4f8d-8e83-1c29879dffb5', {u'OctaveWrapped': 1, u'Citation': u'Salamon, J., & Gomez, E. (2012). Melody extraction from polyphonic music signals using pitch contour characteristics. IEEE Transactions on Audio, Speech, and Language Processing, 20(6), 1759-1770.', u'Value': 222.3934, u'Unit': u'Hz', u'Method': u'Picked the karar manually from the prominent pitch of the audio recording with 1 cent precision.'})
(u'3ee9b4c6-a411-4e63-956f-531bf6a6f40f', {u'OctaveWrapped': 0, u'Citation': u'atli2015fma', u'Value': 340, u'Unit': u'Hz', u'Method': u'Last note detection'})
(u'18443478-1fa6-4668-a5c2-b4ffa5988ddc', {u'OctaveWrapped': 1, u'Citation': u'Salamon, J., & Gomez, E. (2012). Melody extraction from polyphonic music signals using pitch contour characteristics. IEEE Transactions on Audio, Speech, and Language Processing, 20(6), 1759-1770.', u'Value': 293.7308, u'Unit': u'Hz', u'Method': u'Picked the karar manually from the prominent pitch of the audio recording with 1 cent 

In [11]:
# save 
json.dump(tonic_dict, open('annotations.json', 'w'), indent=2)

In [10]:
tonic_dict.keys()[0]

'7e8d6990-0fdd-4ff8-b468-931698bffc75'