### Scherbaum Mshavanadze

In [None]:
import os
import numpy as np

import matplotlib.pyplot as plt

In [None]:
collections = {
    "GVM009": {},
    "GVM017": {},
    "GVM019": {},
    "GVM031": {},
    "GVM097": {}
}

algorithms = ['boersma', 'crepe', 'hermes', 'maddox', 'noll', 'praat']
data_dir = '/Akamai/voice/data/pitches/'

for collection in collections:
    for algo in algorithms:
        collections[collection][algo] = {}
        
def separate(adir, algo):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)

for algorithm in os.listdir(data_dir):
    if not algorithm in algorithms:
        continue
    for collection in os.listdir(f"{data_dir}{algorithm}"):
        if collection != 'Scherbaum Mshavanadze':
            continue
        for song in os.listdir(f"{data_dir}{algorithm}/{collection}"):
            for part in os.listdir(f"{data_dir}{algorithm}/{collection}/{song}"):
                print(part)
                if part[:6] in collections:
                    if 'AHDS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections[part[:6]][algorithm][part[part.index('AHDS'):part.index('AHDS')+6]] = (x, y)
                    elif 'ALRX' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections[part[:6]][algorithm][part[part.index('ALRX'):part.index('ALRX')+6]] = (x, y)
                    elif 'AOLS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections[part[:6]][algorithm][part[part.index('AOLS'):part.index('AOLS')+6]] = (x, y)
                    elif 'VSOA' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections[part[:6]][algorithm][part[part.index('VSOA'):part.index('VSOA')+6]] = (x, y)

In [None]:
def find_average (pitches):
    groups = [[pitches[0], 1]]
    for i in range(1, len(pitches)):
        for j in range(len(groups)):
            avg = groups[j][0]/groups[j][1]
            if (avg == 0):
                if pitches[i] == 0:
                    groups[j][1] += 1
            elif (abs(avg - pitches[i]) / avg < 0.1):
                groups[j][0] += pitches[i]
                groups[j][1] += 1
            else:
                groups.append([pitches[i], 1])
                
    c_max = 0
    b_pitch = None
    
    for group in groups:
        if group[1] > c_max:
            c_max = group[1]
            b_pitch = group[0]/group[1]
            
    return b_pitch
        
    
def find_optimal_pitch (collection, part):
    pitch_estimates = []
    time = collections[collection]['boersma'][part][0]
    
    for algo in algorithms:
        if part in collections[collection][algo]:
            pitch_estimates.append(collections[collection][algo][part][1])    
            
            if len(collections[collection][algo][part][0]) < len(time):
                time = collections[collection][algo][part][0]
    
    best_estimate = np.empty(len(time))
    for i in range(len(time)):
        current_pitches = []
        for pitches in pitch_estimates:
            current_pitches.append(pitches[i])
        best_estimate[i] = find_average(current_pitches)
        
    return (time, best_estimate)

In [None]:
parts = ['AHDS1M', 'AHDS2M', 'AHDS3M', 'ALRX1M', 'ALRX2M', 'ALRX3M', 'VSOAX4', 'AOLS5S']

res_dir = '/Akamai/voice/data/ground-estimate/Scherbaum Mshavanadze/'
for collection in collections:
    for part in parts:
        t, estimate = find_optimal_pitch(collection, part)
        
        if not os.path.isdir(res_dir + collection):
            os.mkdir(res_dir + collection)
            
        np.savetxt(res_dir + collection + '/' + part + '.txt', np.c_[t, estimate], delimiter=' ', fmt='%f')

### Teach Yourself Megrelian Songs

In [None]:
collections2 = {
    "Ak'a Si Rekisho": {},
    'Gepshvat Ghvini': {},
    'Io _ Chkin Kiana': {},
    'Mesishi Vardi': {},
    'Meureme': {},
    'Mi Re Sotsodali_': {},
    "Mole Chit'i Gilakhe": {},
    'O Da': {},
    'Vojanudi Chkim Jargvals': {}
}

algorithms = ['boersma', 'crepe', 'hermes', 'maddox', 'noll']
data_dir = '/Akamai/voice/data/pitches/'

for collection in collections2:
    for algo in algorithms:
        collections2[collection][algo] = {}
        
def separate(adir, algo):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)

def root_name (name):
    s = None; e = None
    for i in range(len(name)-2, 0, -1):
        if name[i] == '.': e = i;
        elif name[i:i+2] == '_A': 
            s = i
            break
    
    if s == None: return name[:e];
    else: return name[:s];

for algorithm in os.listdir(data_dir):
    if not algorithm in algorithms:
        continue
    for collection in os.listdir(f"{data_dir}{algorithm}"):
        if collection != 'Teach Yourself Megrelian Songs':
            continue
        for song in os.listdir(f"{data_dir}{algorithm}/{collection}"):
            for part in os.listdir(f"{data_dir}{algorithm}/{collection}/{song}"):
                print(root_name(part))
                if root_name(part) in collections2:
                    if 'AHDS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections2[root_name(part)][algorithm][part[part.index('AHDS'):part.index('AHDS')+6]] = (x, y)
                    else:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections2[root_name(part)][algorithm]['VSOAX4'] = (x, y)

In [None]:
def find_average (pitches):
    groups = [[pitches[0], 1]]
    for i in range(1, len(pitches)):
        for j in range(len(groups)):
            avg = groups[j][0]/groups[j][1]
            if (avg == 0):
                if pitches[i] == 0:
                    groups[j][1] += 1
            elif (abs(avg - pitches[i]) / avg < 0.1):
                groups[j][0] += pitches[i]
                groups[j][1] += 1
            else:
                groups.append([pitches[i], 1])
                
    c_max = 0
    b_pitch = None
    
    for group in groups:
        if group[1] > c_max:
            c_max = group[1]
            b_pitch = group[0]/group[1]
            
    return b_pitch
        
    
def find_optimal_pitch (collection, part):
    pitch_estimates = []
    
#     print(collection, 'boersma', collections2[collection]['boersma'].keys())
    time = collections2[collection]['boersma'][part][0]
    
    for algo in algorithms:
        if part in collections2[collection][algo]:
            pitch_estimates.append(collections2[collection][algo][part][1])    
            
            if len(collections2[collection][algo][part][0]) < len(time):
                time = collections2[collection][algo][part][0]
    
    best_estimate = np.empty(len(time))
    for i in range(len(time)):
        current_pitches = []
        for pitches in pitch_estimates:
            current_pitches.append(pitches[i])
        best_estimate[i] = find_average(current_pitches)
        
    return (time, best_estimate)

In [None]:
parts = ['AHDS1M', 'AHDS2M', 'AHDS3M', 'VSOAX4']

res_dir = '/Akamai/voice/data/ground-estimate/Teach Yourself Megrelian Songs/'
for collection in collections2:
    for part in parts:
        print(collection)
        t, estimate = find_optimal_pitch(collection, part)
        if not os.path.isdir(res_dir + collection):
            os.mkdir(res_dir + collection)
            
        np.savetxt(res_dir + collection + '/' + part + '.txt', np.c_[t, estimate], delimiter=' ', fmt='%f')