In [1]:
import pandas
from fileoperations.fileoperations import get_filenames_in_dir
import os
import numpy as np
# %pdb

In [2]:
data_dir = './data/'

# get annotation files
notefiles, notefolders, filenames = get_filenames_in_dir(
    data_dir, keyword='notes*.txt')

# get unique recordings
recordings = set(notefolders)

# define annotators; ignore sertan and jasa for now
annotators = {u'burak': u'notesBurak.txt', u'sercan': u'notesSercan.txt',
              u'mirac': u'notesMirac.txt'}

# define types
cols = ['burak_onset', 'sercan_onset', 'mirac_onset', 
        'burak_offset', 'sercan_offset', 'mirac_offset', 
        'burak_duration', 'sercan_duration', 'mirac_duration']

# validate annotators
tmp_annotators = set(filenames)
tmp_annotators.remove(u'notesSertan.txt')
assert set(annotators.values()) == tmp_annotators


In [3]:
for rr in recordings:
    
    # read relevant score
    scorefolder = os.path.split(rr)[0]
    scorename = os.path.split(scorefolder)[1]
    scorefile = os.path.join(scorefolder, scorename + '.txt')
    score = pandas.read_csv(scorefile, sep='\t', index_col=0)
    score = score[score.Nota53 != "Es"]  # remove rests
    score = score[score.Ms > 0]  # keep notes only with nonzero duration
    for cc in cols:  # add annotator columns
        score[cc] = np.nan
    # force index to unicode as addition/repetition/insertion indices are string
    score.index = score.index.map(unicode)

    for aa, af in annotators.items():
        annotations = pandas.read_csv(os.path.join(rr, af), sep='\t', 
                                      usecols=[0, 1, 2, 4], index_col=3,
                                      names=["onset", "frequency", "duration", None])

        # force index to unicode as addition/repetition/insertion indices are string
        annotations.index = annotations.index.map(unicode)

        for anno in annotations.iterrows():
            if anno[0] in score.index:
                assert anno[1]['duration'] > 0.001

                score.set_value(anno[0], '_'.join((aa, 'onset')), anno[1]['onset'])
                score.set_value(anno[0], '_'.join((aa, 'duration')), anno[1]['duration'])
                score.set_value(anno[0], '_'.join((aa, 'offset')), 
                                anno[1]['onset'] + anno[1]['duration'])
            else:  # addition, repetition and insertions
                #raise NotImplementedError
                pass

    score['onset_mean'] = score[['burak_onset', 'sercan_onset', 
                                 'mirac_onset']].mean(axis=1)
    score['onset_std'] = score[['burak_onset', 'sercan_onset', 
                                'mirac_onset']].std(axis=1)
    score['offset_mean'] = score[['burak_offset', 'sercan_offset', 
                                  'mirac_offset']].mean(axis=1)
    score['offset_std'] = score[['burak_offset', 'sercan_offset', 
                                 'mirac_offset']].std(axis=1)
    score['duration_mean'] = score[['burak_duration', 'sercan_duration', 
                                    'mirac_duration']].mean(axis=1)
    score['duration_std'] = score[['burak_duration', 'sercan_duration', 
                                   'mirac_duration']].std(axis=1)

    # compute onset deviations and list the annotation which are greater than
    # 100 ms
    onset_dev = abs(score[['burak_onset', 'sercan_onset', 'mirac_onset']].sub(
            score['onset_mean'], axis=0))
    thres_dev = onset_dev[(onset_dev > 0.1).any(1)]
    if not thres_dev.empty:
        print rr
        print thres_dev

./data/segah--pesrev--devrikebir----neyzen_yusuf_pasa/e49f33b8-cf8a-4ca9-88cf-9a994dbad1c0
    burak_onset  sercan_onset  mirac_onset
49     0.265336      0.259169     0.524505
72     0.267634      0.135268     0.132366
./data/hicaz--sarki--agiraksak--bir_nigah_et--sekerci_cemil_bey/3cace2f0-125d-4777-95d3-c87c16f360db
     burak_onset  sercan_onset  mirac_onset
180     0.098473      0.023462     0.121935
182     0.017143      0.083900     0.101043
191     0.136327      0.154286     0.017959
193     0.296054      0.296054          NaN
196     0.059562      0.108904     0.168466
197     0.104550      0.053847     0.158398
198     0.127982      0.020590     0.148571
201     0.050280      0.050280     0.100559
226     0.135540      0.147785     0.283326
./data/nisaburek--sarki--turkaksagi--varsin_gonul--lemi_atli/6d892b77-9733-4ba7-a497-646c969c72b8
     burak_onset  sercan_onset  mirac_onset
158     0.255782      0.382993     0.127211
159     0.133333      0.166667     0.033333
160     0