In [1]:
from matplotlib import pyplot as plt
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import os

In [6]:
def estimate_pitch (estimates):
    estimates = sorted(estimates)
    X = np.array(estimates).reshape(-1, 1)
    clustering = AgglomerativeClustering(n_clusters=None, affinity='l1', linkage='average', distance_threshold=5).fit(X)
    clusters = []
    for i in range(np.max(clustering.labels_)+1):
        ccount = 0
        csum = 0
        for j, e in enumerate(clustering.labels_):
            if e == i:
                ccount += 1
                csum += estimates[j] 
        clusters.append((ccount, csum / ccount))
    clusters.sort(reverse=True)
    return clusters[0][1]

In [3]:
def find_optimal_pitch (current_collections, collection, part):
    pitch_estimates = []
    time = current_collections[collection]['boersma'][part][0]
    
    for algo in algorithms:
        if part in current_collections[collection][algo]:
            pitch_estimates.append(current_collections[collection][algo][part][1])    
            
            if len(current_collections[collection][algo][part][0]) < len(time):
                time = current_collections[collection][algo][part][0]
    
    best_estimate = np.empty(len(time))
    for i in range(len(time)):
        current_pitches = []
        for pitches in pitch_estimates:
            current_pitches.append(pitches[i])
        best_estimate[i] = estimate_pitch(current_pitches)
        
    return (time, best_estimate)

### Scherbaum Mshavanadze

In [4]:
collections = {
    "GVM009": {},
    "GVM017": {},
    "GVM019": {},
    "GVM031": {},
    "GVM097": {}
}

algorithms = ['boersma', 'crepe', 'hermes', 'maddox', 'noll', 'praat']
data_dir = '/Akamai/voice/data/pitches/'

for collection in collections:
    for algo in algorithms:
        collections[collection][algo] = {}
        
def separate(adir):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)

for algorithm in os.listdir(data_dir):
    if not algorithm in algorithms:
        continue
    for collection in os.listdir(f"{data_dir}{algorithm}"):
        if collection != 'Scherbaum Mshavanadze':
            continue
        for song in os.listdir(f"{data_dir}{algorithm}/{collection}"):
            for part in os.listdir(f"{data_dir}{algorithm}/{collection}/{song}"):
                print(part)
                if part[:6] in collections:
                    if 'AHDS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}")
                        collections[part[:6]][algorithm][part[part.index('AHDS'):part.index('AHDS')+6]] = (x, y)
                    elif 'ALRX' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}")
                        collections[part[:6]][algorithm][part[part.index('ALRX'):part.index('ALRX')+6]] = (x, y)
                    elif 'AOLS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}")
                        collections[part[:6]][algorithm][part[part.index('AOLS'):part.index('AOLS')+6]] = (x, y)
                    elif 'VSOA' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}")
                        collections[part[:6]][algorithm][part[part.index('VSOA'):part.index('VSOA')+6]] = (x, y)

GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_AHDS1M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_AHDS2M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_AHDS3M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_ALRX1M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_ALRX2M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_ALRX3M.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_AOLS5S.txt
GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_VSOAX4.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_AHDS1M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_AHDS2M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_AHDS3M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX1M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX2M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX3M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_AOLS5S.txt
GVM017_ChvenMshv

GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX3M.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_AOLS5S.txt
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_VSOAX4.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_AHDS1M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_AHDS2M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_AHDS3M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_ALRX1M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_ALRX2M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_ALRX3M.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_AOLS5S.txt
GVM019_DaleKojas_DidgoriVillage_Didgori_20160707_VSOAX4.txt
GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819_AHDS1M.txt
GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819_AHDS2M.txt
GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819_AHDS3M.txt
GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819_ALRX1M.txt
GVM031_EliaLrde_LakhushdiVillage_Mur

In [7]:
parts = ['AHDS1M', 'AHDS2M', 'AHDS3M', 'ALRX1M', 'ALRX2M', 'ALRX3M', 'VSOAX4', 'AOLS5S']

res_dir = '/Akamai/voice/data/ground-estimate/Scherbaum Mshavanadze/'
for collection in collections:
    for part in parts:
        t, estimate = find_optimal_pitch(collections, collection, part)
        if not os.path.isdir(res_dir + collection):
            os.mkdir(res_dir + collection)
        np.savetxt(res_dir + collection + '/' + part + '.txt', np.c_[t, estimate], delimiter=' ', fmt='%f')
        print('done')

done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done


### Teach Yourself Megrelian Songs

In [8]:
collections2 = {
    "Ak'a Si Rekisho": {},
    'Gepshvat Ghvini': {},
    'Io _ Chkin Kiana': {},
    'Mesishi Vardi': {},
    'Meureme': {},
    'Mi Re Sotsodali_': {},
    "Mole Chit'i Gilakhe": {},
    'O Da': {},
    'Vojanudi Chkim Jargvals': {}
}

algorithms = ['boersma', 'crepe', 'hermes', 'maddox', 'noll']
data_dir = '/Akamai/voice/data/pitches/'

for collection in collections2:
    for algo in algorithms:
        collections2[collection][algo] = {}
        
def separate(adir, algo):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)

def root_name (name):
    s = None; e = None
    for i in range(len(name)-2, 0, -1):
        if name[i] == '.': e = i;
        elif name[i:i+2] == '_A': 
            s = i
            break
    
    if s == None: return name[:e];
    else: return name[:s];

for algorithm in os.listdir(data_dir):
    if not algorithm in algorithms:
        continue
    for collection in os.listdir(f"{data_dir}{algorithm}"):
        if collection != 'Teach Yourself Megrelian Songs':
            continue
        for song in os.listdir(f"{data_dir}{algorithm}/{collection}"):
            for part in os.listdir(f"{data_dir}{algorithm}/{collection}/{song}"):
                if root_name(part) in collections2:
                    if 'AHDS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections2[root_name(part)][algorithm][part[part.index('AHDS'):part.index('AHDS')+6]] = (x, y)
                    else:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections2[root_name(part)][algorithm]['VSOAX4'] = (x, y)

In [9]:
parts = ['AHDS1M', 'AHDS2M', 'AHDS3M', 'VSOAX4']

res_dir = '/Akamai/voice/data/ground-estimate/Teach Yourself Megrelian Songs/'
for collection in collections2:
    for part in parts:
        t, estimate = find_optimal_pitch(collections2, collection, part)
        if not os.path.isdir(res_dir + collection):
            os.mkdir(res_dir + collection)
            
        np.savetxt(res_dir + collection + '/' + part + '.txt', np.c_[t, estimate], delimiter=' ', fmt='%f')

### Teach Yourself Gurian Songs

In [10]:
collections3 = {
    "Adila-Alipasha": {},
    "Indi-Mindi": {},
    'Mival Guriashi (1)': {} ,
    'Pikris Simghera': {},
    "Alaverdi": {},
    "K'alos Khelkhvavi": {},
    'Mival Guriashi (2)' : {}, 
    "Sabodisho": {},
    "Khasanbegura": {},     
    "Mok'le Mravalzhamieri": {},
    'Sadats Vshobilvar': {},
    "Beri Ak'vans Epareba": {}, 
    "Lat'aris Simghera": {},    
    "Mts'vanesa Da Ukudosa": {}, 
    "Shermanduli": {},
    "Brevalo": {},             
    "Manana": {},         
    'Nanina (1)': {},      
    "Shvidk'atsa": {},
    "Chven-Mshvidoba": {},    
    "Maq'ruli": {},               
    'Nanina (2)': {},          
    'Supris Khelkhvavi': {},
    'Didi Khnidan': {},     
    "Masp'indzelsa Mkhiarulsa": {}, 
    "Orira": {},                
    "Ts'amok'ruli": {},
    "Gakhsovs, T'urpa": {},
    "Me-Rustveli": {},        
    "P'at'ara Saq'varelo": {}
}

algorithms = ['boersma', 'crepe', 'hermes', 'maddox', 'noll']
data_dir = '/Akamai/voice/data/pitches/'

for collection in collections3:
    for algo in algorithms:
        collections3[collection][algo] = {}
        
def separate(adir, algo):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)

def root_name (name):
    s = None; e = None
    for i in range(len(name)-2, 0, -1):
        if name[i] == '.': e = i;
        elif name[i:i+2] == '_A': 
            s = i
            break
    
    if s == None: return name[:e];
    else: return name[:s];

for algorithm in os.listdir(data_dir):
    if not algorithm in algorithms:
        continue
    for collection in os.listdir(f"{data_dir}{algorithm}"):
        if collection != 'Teach Yourself Gurian Songs':
            continue
        for song in os.listdir(f"{data_dir}{algorithm}/{collection}"):
            for part in os.listdir(f"{data_dir}{algorithm}/{collection}/{song}"):
                print(root_name(part))
                if root_name(part) in collections3:
                    if 'AHDS' in part:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections3[root_name(part)][algorithm][part[part.index('AHDS'):part.index('AHDS')+6]] = (x, y)
                    else:
                        x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{part}", algorithm)
                        collections3[root_name(part)][algorithm]['VSOAX4'] = (x, y)

Adila-Alipasha
Adila-Alipasha
Adila-Alipasha
Adila-Alipasha
Alaverdi
Alaverdi
Alaverdi
Alaverdi
Beri Ak'vans Epareba
Beri Ak'vans Epareba
Beri Ak'vans Epareba
Beri Ak'vans Epareba
Brevalo
Brevalo
Brevalo
Brevalo
Chven-Mshvidoba
Chven-Mshvidoba
Chven-Mshvidoba
Chven-Mshvidoba
Didi Khnidan
Didi Khnidan
Didi Khnidan
Didi Khnidan
Gakhsovs, T'urpa
Gakhsovs, T'urpa
Gakhsovs, T'urpa
Gakhsovs, T'urpa
Indi-Mindi
Indi-Mindi
Indi-Mindi
Indi-Mindi
K'alos Khelkhvavi
K'alos Khelkhvavi
K'alos Khelkhvavi
K'alos Khelkhvavi
Khasanbegura
Khasanbegura
Khasanbegura
Khasanbegura
Lat'aris Simghera
Lat'aris Simghera
Lat'aris Simghera
Lat'aris Simghera
Manana
Manana
Manana
Manana
Maq'ruli
Maq'ruli
Maq'ruli
Maq'ruli
Masp'indzelsa Mkhiarulsa
Masp'indzelsa Mkhiarulsa
Masp'indzelsa Mkhiarulsa
Masp'indzelsa Mkhiarulsa
Me-Rustveli
Me-Rustveli
Me-Rustveli
Me-Rustveli
Mival Guriashi (1)
Mival Guriashi (1)
Mival Guriashi (1)
Mival Guriashi (1)
Mival Guriashi (2)
Mival Guriashi (2)
Mival Guriashi (2)
Mival Guriashi (2)


Nanina (1)
Nanina (1)
Nanina (1)
Nanina (1)
Nanina (2)
Nanina (2)
Nanina (2)
Nanina (2)
Orira
Orira
Orira
Orira
P'at'ara Saq'varelo
P'at'ara Saq'varelo
P'at'ara Saq'varelo
P'at'ara Saq'varelo
Pikris Simghera
Pikris Simghera
Pikris Simghera
Pikris Simghera
Sabodisho
Sabodisho
Sabodisho
Sabodisho
Sadats Vshobilvar
Sadats Vshobilvar
Sadats Vshobilvar
Sadats Vshobilvar
Shermanduli
Shermanduli
Shermanduli
Shermanduli
Shvidk'atsa
Shvidk'atsa
Shvidk'atsa
Shvidk'atsa
Supris Khelkhvavi
Supris Khelkhvavi
Supris Khelkhvavi
Supris Khelkhvavi
Ts'amok'ruli
Ts'amok'ruli
Ts'amok'ruli
Ts'amok'ruli


In [11]:
parts = ['AHDS1M', 'AHDS2M', 'AHDS3M', 'VSOAX4']

res_dir = '/Akamai/voice/data/ground-estimate/Teach Yourself Gurian Songs/'
for collection in collections3:
    for part in parts:
        t, estimate = find_optimal_pitch(collections3, collection, part)
        if not os.path.isdir(res_dir + collection):
            os.mkdir(res_dir + collection)
            
        np.savetxt(res_dir + collection + '/' + part + '.txt', np.c_[t, estimate], delimiter=' ', fmt='%f')
        print('done')

done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
done


### Testing and Debugging

In [None]:
data_dir = '/Akamai/voice/data/ground-estimate/Scherbaum Mshavanadze/'
data_dir += os.listdir(data_dir)[0] + '/'
data_dir += os.listdir(data_dir)[0]

x, y = separate(data_dir)
plt.figure(figsize=(20, 5))
plt.plot(x, y, '.', markersize=2)

In [18]:
estimate_pitch([201.9599, 201.3181, 201.1465, 200.005, 67.73271])

201.107375