# Alignment Algorithm

The signal is interpolated via a cubic interpolation function provided by scipy.interpolate.interp1d. The difference between the the signal and the shifted signal (signal is shifted from all values from -0.5 seconds to 0.5 seconds). Finally, the fmindbound algorithm is applied to get a precise value for the shift.

In [1]:
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
from scipy import optimize
import os

In [2]:
min_f0 = 60
max_f0 = 2000

In [3]:
data_dir = '/Akamai/voice/data/pitches-raw/'
res_dir = '/Akamai/voice/data/pitches/'

def align (x1, y1, x2, y2, graph=False):
    y1[y1 > max_f0] = max_f0
    y2[y2 > max_f0] = max_f0
    
    f1 = interpolate.interp1d(x1, y1, fill_value='extrapolate')
    f2 = interpolate.interp1d(x2, y2, fill_value='extrapolate')
    cx = np.arange(0, x1[len(x1)-1], 0.01)
    s = np.arange(-.5, .5, 0.01)
    sy = np.empty(s.size)
    for i, e in enumerate(s):
        sy[i] = np.sum(abs(f1(cx) - f2(cx + e)))
    f3 = interpolate.interp1d(s, sy, fill_value='extrapolate')
    
    r_est = s[np.argmin(sy)]
    os = optimize.fminbound(f3, r_est-0.05, r_est+0.05)
    x = np.arange(0, x1[len(x1)-1], 0.01)
    
    if graph:
        plt.figure(figsize=(20, 5))
        plt.plot(s, sy)
        plt.axvline()
        
    return (x, f2(x+os))

def separate(adir):
    conv={}
    conv[0] = lambda s: float(s.strip() or 0)
    x,y = np.loadtxt(adir, unpack=True, usecols=(0,1), converters=conv)
    return (x,y)
    
def save_song (path, x, y):
    fout = open(path, "w+")
    for i in range(x.size):
        fout.write(f"{x[i]} {y[i]}\n")
    fout.close()
    
def file_existence(path, force=False):
    if force:
        return False
    return os.path.isfile(path)

### Functions for Input, Processing and Output

In [4]:
def read_data (collection_name):
    global data, data_dir
    
    for algorithm in sorted(os.listdir(data_dir)):
        if algorithm in data:
            for collection in sorted(os.listdir(f"{data_dir}{algorithm}")):
                if collection != collection_name:
                    continue
                for song in sorted(os.listdir(f"{data_dir}{algorithm}/{collection}")):
                    for location in sorted(os.listdir(f"{data_dir}{algorithm}/{collection}/{song}")):
                        
                        # add force=True to overwrite the pitch in res_dir
#                         if file_existence(f"{res_dir}{algorithm}/{collection}/{song}/{location}"):
#                             print(f"{algorithm}:{location} already exists")
#                             continue
                        
                        if (location[-4:] == '.txt') and song in data[algorithm]:
                            x, y = separate(f"{data_dir}{algorithm}/{collection}/{song}/{location}")
                            data[algorithm][song][location] = (x, y)
def align_songs ():
    global data_dir, data
    
    for song in data['boersma'].keys():
        for part, pitches in data['boersma'][song].items():
            for algo in data.keys():
                if part in data[algo][song]:
                    bx, by = data['boersma'][song][part]
                    ox, oy = data[algo][song][part]
                    mx, my = align(bx, by, ox, oy)
                    data[algo][song][part] = (mx, my)
                else:
                    print(f"{song}/{part} does not exist for {algo}")
                    
def save_aligned (collection_name):
    for algo in data.keys():
        for song in data[algo].keys():
            for part in data[algo][song].keys():
                x, y = data[algo][song][part]
                c_path = f"{res_dir}{algo}/{collection_name}/{song}/{part}"
                save_song(c_path, x, y)

### Scherbaum Mshavanadze

In [None]:
songs = ["GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919", 
         "GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713", 
         "GVM019_DaleKojas_DidgoriVillage_Didgori_20160707", 
         "GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819", 
         "GVM097_KristeAghsdga_LakhushdiVillage_MuradGigoGivi_20160819"
        ]

data = {
    "boersma": {},
    "noll": {},
    "praat": {},
    "yin": {},
    "crepe": {},
    "maddox": {},
    "hermes": {}
}


for algo in data.keys():
    for full_song in songs:
        data[algo][full_song] = {}

read_data('Scherbaum Mshavanadze')
align_songs()
save_aligned('Scherbaum Mshavanadze')

### Teach Yourself Megrelian Songs

In [5]:
songs = [#"Ak'a Si Rekisho", 
         #"Gepshvat Ghvini", 
         #"Io _ Chkin Kiana", 
         #"Mesishi Vardi", 
         #"Meureme", 
         "Mi Re Sotsodali_" #, 
         #"Mole Chit'i Gilakhe", 
         #"O Da", 
         #"Vojanudi Chkim Jargvals"
        ]

data = {
    "boersma": {},
    "noll": {},
    "praat": {},
    "yin": {},
    "crepe": {},
    "maddox": {},
    "hermes": {}
}


for algo in data.keys():
    for full_song in songs:
        data[algo][full_song] = {}

read_data('Teach Yourself Megrelian Songs')
align_songs()
save_aligned('Teach Yourself Megrelian Songs')

Mi Re Sotsodali_/Mi Re Sotsodali_.txt does not exist for praat
Mi Re Sotsodali_/Mi Re Sotsodali_.txt does not exist for yin
Mi Re Sotsodali_/Mi Re Sotsodali__AHDS1M.txt does not exist for praat
Mi Re Sotsodali_/Mi Re Sotsodali__AHDS2M.txt does not exist for praat
Mi Re Sotsodali_/Mi Re Sotsodali__AHDS3M.txt does not exist for praat


### Teach Yourself Gurian Songs

In [None]:
songs = [
"Adila-Alipasha",
"Indi-Mindi",
'Mival Guriashi (1)' ,
'Pikris Simghera',
"Alaverdi",
"K'alos Khelkhvavi",
'Mival Guriashi (2)' , 
"Sabodisho",
"Khasanbegura",     
"Mok'le Mravalzhamieri",
'Sadats Vshobilvar',
"Beri Ak'vans Epareba", 
"Lat'aris Simghera",    
"Mts'vanesa Da Ukudosa", 
"Shermanduli",
"Brevalo",             
"Manana",         
'Nanina (1)',      
"Shvidk'atsa",
"Chven-Mshvidoba",    
"Maq'ruli",               
'Nanina (2)',          
'Supris Khelkhvavi',
'Didi Khnidan',     
"Masp'indzelsa Mkhiarulsa", 
"Orira",                
"Ts'amok'ruli",
"Gakhsovs, T'urpa",
"Me-Rustveli",        
"P'at'ara Saq'varelo"
]

data = {
    "boersma": {},
    "noll": {},
    "praat": {},
    "yin": {},
    "crepe": {},
    "maddox": {},
    "hermes": {}
}


for algo in data.keys():
    for full_song in songs:
        data[algo][full_song] = {}

read_data('Teach Yourself Gurian Songs')
align_songs()
save_aligned('Teach Yourself Gurian Songs')

### Debugging

In [None]:
bdd = '/Akamai/voice/data/pitches-raw/boersma/Scherbaum Mshavanadze/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_ALRX1M.txt'
cdd = '/Akamai/voice/data/pitches-raw/crepe/Scherbaum Mshavanadze/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_ALRX1M.txt'
bx, by = separate(bdd)
cx, cy = separate(cdd)

In [None]:
mx, my = align(bx, by, cx, cy)

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(bx, by)
plt.plot(cx, cy)
# plt.plot(mx, my)