### Histogram of 100 x |final target - ground estimate| / final target, for the three parts of every song


- One histogram for times where notes > 0 and there's no override
- One histogram for times where notes = 0 and there's no override
- One histogram for times where notes < 0 and there's no override
- One histogram for times where there's an override

Bin size = 1\%.

Bins 0, 1, ..., 19, 20+.

In [None]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
from scipy.signal import butter
from scipy.fft import fft, ifft, rfft, irfft
import random
from scipy.signal import argrelmin, argrelmax, argrelextrema
import os

In [None]:
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import pandas as pd
import plotly.io as pio
from plotly.offline import init_notebook_mode, iplot, plot
from IPython.display import display, clear_output
from ipywidgets import widgets, Button, HBox, VBox
from plotly.colors import DEFAULT_PLOTLY_COLORS
from plotly.callbacks import LassoSelector, BoxSelector


In [None]:
raw_dir = "/Akamai/voice/data/"
estimate_dir = "/Akamai/voice/data/ground-estimate/"
collections = {"sm":"Scherbaum Mshavanadze",
               "guria":"Teach Yourself Gurian Songs",
               "megrelia":"Teach Yourself Megrelian Songs"}

collection_directories = {"sm":
                          ["GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919",
                           "GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713",
                           "GVM019_DaleKojas_DidgoriVillage_Didgori_20160707",
                           "GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819",
                           "GVM097_KristeAghsdga_LakhushdiVillage_MuradGigoGivi_20160819"],
                          "guria":
                          ["Adila-Alipasha",
                           "Alaverdi",
                           "Beri Ak'vans Epareba", 
                           "Brevalo",             
                           "Chven-Mshvidoba",    
                           #'Didi Khnidan',     
                           "Gakhsovs, T'urpa",
                           "Indi-Mindi",
                           "K'alos Khelkhvavi",
                           "Khasanbegura",     
                           "Lat'aris Simghera",    
                           "Manana",         
                           "Maq'ruli",               
                           "Masp'indzelsa Mkhiarulsa", 
                           "Me-Rustveli",        
                           'Mival Guriashi (1)' ,
                           'Mival Guriashi (2)' , 
                           "Mok'le Mravalzhamieri",
                           "Mts'vanesa Da Ukudosa", 
                           'Nanina (1)',      
                           'Nanina (2)',          
                           "Orira",
                           "P'at'ara Saq'varelo",                
                           'Pikris Simghera',
                           "Sabodisho",
                           'Sadats Vshobilvar',
                           "Shermanduli",
                           "Shvidk'atsa",
                           'Supris Khelkhvavi',
                           "Ts'amok'ruli"],
                         "megrelia":
                          ["Vojanudi Chkim Jargvals",
                           "Ak'a Si Rekisho",
                           "Gepshvat Ghvini",
                           "Io _ Chkin Kiana",
                           "Mesishi Vardi",
                           "Meureme",
                           "Mi Re Sotsodali_",
                           "Mole Chit'i Gilakhe",
                           "O Da"]}

ground_estimate_dir = "/Akamai/voice/data/ground-estimate/"
target_dir = "/Akamai/voice/data/pitches-postprocessed/crepe/"
overrides_dir = "/Akamai/voice/data/pitch-overrides/crepe/"

### Helper functions for loading songs.

In [None]:
# get index of x value in range of equally-spaced values
def xIndex(xrange, xval):
    xmin = xrange[0]
    step = xrange[1] - xrange[0]
    return int((xval-xmin)/step + 0.5)

# Convert override box [x1 y1 x2 y2] to x and y traces
def boxesToTrace(boxLines, xrange):
    newX = np.zeros(2*len(boxLines))
    newY = np.zeros(2*len(boxLines))
    isOverride = np.zeros(len(xrange), dtype=bool)
    lineno = 0
    for line in boxLines:
        lineArray = line.split(" ")
        [lineLeftX, lineLeftY, lineRightX, lineRightY] = [float(numeric_string) for numeric_string in lineArray]
        newX[2*lineno:2*lineno+2] = [lineLeftX, lineRightX]
        newY[2*lineno:2*lineno+2] = [lineLeftY, lineRightY]
        isOverride[xIndex(xrange, lineLeftX):xIndex(xrange, lineRightX)+1] = True
        lineno += 1
    return (newX, newY, isOverride)

def get_boxes(file, xrange):
    try:
        #print(f"Getting box overrides from {file}")
        f = open(file)
    except (IOError, FileNotFoundError):
        return (np.empty(shape=(0)), np.empty(shape=(0)))
    else:
        with f:
            return boxesToTrace(f.read().splitlines(), xrange)


### Load songs.

In [None]:
data = {}
locations = {}
algos = ['boersma', 'crepe', 'hermes', 'noll', 'yin']

def separate(adir):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)
        
def load_songs():
    for coll, collection in collections.items():
        print(" ", collection)
        for song in collection_directories[coll]:
            songDir = song
            if coll == 'sm':
                songDir = song[:6]
            data[songDir] = {}
            print("  ", song)
            for part in ['1', '2', '3']:
                partFile = 'AHDS' + part + 'M.txt'
                noteFile = 'AHDS' + part + 'M.notes.txt'
                boxes_file = f"{overrides_dir}{collection}/{song}/{song}_{partFile}"                        
                x, y = separate(f"{target_dir}{collection}/{song}/{song}_{partFile}")
                bo = get_boxes(boxes_file, x)
                boxes = (bo[0], bo[1])
                isOverride = bo[2]
                x, y = separate(f"{target_dir}{collection}/{song}/{song}_{partFile}")
                xn, yn = separate(f"{target_dir}{collection}/{song}/{song}_{noteFile}")
                xg, yg = separate(f"{ground_estimate_dir}{collection}/{songDir}/{partFile}")
                data[songDir][part] = (x, y, yn, yg, boxes, isOverride)
    print("\nLoaded song data from files into dictionary")
                       
load_songs()                                
                                

In [None]:
bins = list(range(-3,22))
def histograms():
    for coll, collection in collections.items():
        print(collection)
        for song in collection_directories[coll]:
            songDir = song
            if coll == 'sm':
                songDir = song[:6]
            print(f"  {songDir}")
            axs = plt.figure(figsize=(10, 3)).subplots(1, 3)
            for part in ['1', '2', '3']:
                npart = int(part) -1
                partname = "Part " + part
                (x, y, yn, yg, boxes, isOverride) = data[songDir][part]

                # it is assumed yn > 0 implies y > 0 (this is a check the final target had to pass)
                pctDiff = np.divide(np.abs(y-yg), y, where=np.logical_and(yn > 0, y > 0))*100
                pctDiff[np.logical_and(yg == 0, yn == 0)] = 0

                # times we are ignoring
                pctDiff[yn < 0] = -1
                pctDiff[isOverride] = -2

                # errors
                pctDiff[np.logical_and(yg != 0, yn == 0)] = -3
                pctDiff[np.logical_and(yn > 0, y == 0)] = -4

                # large numbers
                pctDiff[pctDiff>=21] = 21

                axs[npart].hist(pctDiff, bins=bins, log=True)
                axs[npart].set_title(f"Histogram of pct diff for {partname}")
                axs[npart].set_xlabel("100 * |target - est.| / target")


                # Show/save figure as desired.
            plt.show()
                             

### Histograms for non-overrides, notes >= 0

These are histograms of the relative difference between final target and ground estimate pitches as a percentage.

The negative values are flags:

- -1: the target note is negative, which means the pitch is uncertain
- -2: there was a pitch override box
- -3: an error - the note is 0 but the ground estimate is not
- -4: an error - the target is 0 but the note is not




In [None]:
histograms()
