### Shifts the timestamps in different types of files, according to hand-coded shift amounts

This notebook uses the hand-coded shift amounts in `.../<song-directory>/shifts.txt`. Each voice part has a list of shift amounts for different parts of the song.

This notebook optionally applies shifts to files that contain pitch estimates, pitch overrides, and/or the MAD statistic.

In [None]:
import os
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
from scipy import optimize

In [None]:
data_dir = '/Akamai/voice/data/ground-estimate/'
data2_dir = '/Akamai/voice/data/pitches-crepe-assisted/'
data2a_dir = '/Akamai/voice/data/pitches-vuv-crepe-assisted/'
data3_dir = '/Akamai/voice/data/pitches-postprocessed/crepe/'
data4_dir = '/Akamai/voice/data/pitch-overrides/crepe/'
data5_dir = '/Akamai/voice/data/ground-estimate-statistics/mad/'

In [None]:
def shift_chunk(dest, src, start, end, shift):
    """
    Pastes a shifted chunk of src onto dest.
    It will not extend dest to the left. If the shift is to the left it drops part of chunk if necessary
    
    start: the start index of the chunk
    end: the end index of the chunk 
    shift: an integer number of indexes to shift right
    """
    
    chunk = np.array(src[start:end])
    #print("Chunk ave/max", np.mean(chunk), np.max(chunk))
    #print("Pasting", start, end,"of source to", start+shift, start+shift+chunk.size, "of end.")
    if start + shift < 0:
        chunk = chunk[- shift - start:]
        shift = -start
    
    #print(start, end, shift)
        
    destchunk = np.maximum(dest[start+shift:start+shift+chunk.size], chunk)
    #print("Destchunk ave/max", np.mean(destchunk), np.max(destchunk))
    dest[start+shift:start+shift+destchunk.size] = destchunk[:]
        
def shift_array(src, shifts):
    """Shifts different chunks of an array by different amounts.
       A shift to the left by x causes the first x samples to be dropped
       A shift to the right by x pads on the left with x 0's so that
       the output is x samples longer than the input 
       
       shifts: tuples (startx, endx, shift) where
        - startx is a float, starting time in seconds
        - endx is a float, ending time in seconds (exclusive)
        - shift is a float, shift amount (to the right) in seconds
    """
    dest = np.zeros(src.size)
    for start, end, shift in shifts:
        start, end, shift = round(100*start), round(100*end), round(100*shift)
        if end > src.size:
            end = src.size
        if dest.size < end + shift:
            dest = np.r_[dest, np.zeros(end + shift - dest.size)]
        shift_chunk(dest, src, start, end, shift)
        #print("dest ave on range", np.mean(dest[max(0,start+shift):end+shift]))
    return dest

In [None]:
def parse_name(song, vtype):
    ans = None
    if "AHDS1M" in vtype:
        ans = 0
    elif "AHDS2M" in vtype:
        ans = 1
    elif "AHDS3M" in vtype:
        ans = 2
    
    if "GVM009" in song and ans is not None:
        return 2 - ans
    return ans

In [None]:
'''Shift the timestamps in the pitch estimate and/or statistics files
   Calls shift_array() for each voice part, passing `shifts`, a list of different shift amounts for different chunks
'''
def shift_songs(data_dir, shift_dir, collection, findtext, match=None):
    for song in os.listdir(os.path.join(data_dir, collection)):        
        if song != working_song and working_song != "": continue
        if "checkpoints" in song: continue
        print('\t\t', song)
        shiftsong = song
        if collection == "Scherbaum Mshavanadze":
            shiftsong = song[:6]
        with open(os.path.join(shift_dir, collection, shiftsong, "shifts.txt")) as shift_file:
            for voice_type in os.listdir(os.path.join(data_dir, collection, song)):
                if "shifts" in voice_type or "shifted" in voice_type or "~" in voice_type or "#" in voice_type: continue
                if parse_name(song, voice_type) is None: continue
                if match != None and (not match in voice_type): continue
                print(voice_type)
                    
                shiftList = list(map(float, shift_file.readline().split(' ')))
                it = iter(shiftList)
                shifts = list(zip(it, it, it))
                
                cdata = np.loadtxt(os.path.join(data_dir, collection, song, voice_type))
                x1, y1 = cdata[:, 0], cdata[:, 1]
                y2 = shift_array(y1, shifts)
                x2 = np.arange(0, y2.size, 1.0) # y2 may be longer than y1
                outfn = os.path.join(data_dir, collection, song, voice_type[:voice_type.index(findtext)] + "_shifted.txt")
                np.savetxt(outfn, np.vstack((x2, y2)).T, fmt='%.2f')
    

In [None]:
def chunkIndex(c, shifts):
    chunk = 0
    for start, end, shift in shifts:
        if start <= c[0] < end and start <= c[2] < end:
            return chunk
        chunk += 1
    return None
    
'''Shift the timestamps in the manual override files'''
def shift_overrides(data_dir, shift_dir, collection, findtext):
    for song in os.listdir(os.path.join(data_dir, collection)):
        if song != working_song and working_song != "": continue
        if ".DS_Store" in song: continue
        print('\t\t', song)
        shiftsong = song
        if collection == "Scherbaum Mshavanadze":
            shiftsong = song[:6]
        with open(os.path.join(shift_dir, collection, shiftsong, "shifts.txt")) as shift_file:
            for voice_type in os.listdir(os.path.join(data_dir, collection, song)):
                if "shifts" in voice_type or "shifted" in voice_type or "~" in voice_type or "#" in voice_type: continue
                if parse_name(song, voice_type) is None: continue
                    
                shiftList = list(map(float, shift_file.readline().split(' ')))
                it = iter(shiftList)
                shifts = list(zip(it, it, it))
                
                with open(os.path.join(data_dir, collection, song, voice_type)) as original:
                    with open(os.path.join(data_dir, collection, song, voice_type[:voice_type.index(findtext)] + "_shifted.txt"), "w") as new_shifted:
                        for line in original:
                            c = list(map(float, line.strip().split(' ')))
                            idx = chunkIndex(c, shifts)
                            if idx == None:
                                raise ValueError(f"Override {c} not in chunks {s}")
                            c[0] += shifts[idx][2]
                            c[2] += shifts[idx][2]
                            new_shifted.write(" ".join(list(map(str, c))) + '\n')


### Scherbaum Mshavanadze, Teach Yourself Gurian Songs, Teach Yourself Megrelian Songs

In [None]:
# restrict what to shift
toshift = {
    'ground_estimates': False,
    'algorithms-pre-vuv':False,
    'algorithms': False,
    'targets': False,
    'notes': False,
    'mad': True, 
    'overrides': False
}
# collections = ['Scherbaum Mshavanadze', 'Teach Yourself Megrelian Songs', 'Teach Yourself Gurian Songs']
collections = ['Teach Yourself Gurian Songs', 'Teach Yourself Megrelian Songs']
algos = [ 'boersma', 'crepe', 'hermes', 'noll', 'yin']
working_song = ""
for collection in collections:
    if toshift['ground_estimates']:
        print(f"Ground estimates for {collection}")
        shift_songs(data_dir, data_dir, collection, '.')
    
    if toshift['algorithms-pre-vuv']:
        print(f"Pre-vuv Algorithms for {collection}")
        for algo in os.listdir(os.path.join(data2_dir)):
            if not algo in algos: continue
            print("\t", algo)
            shift_songs(data2_dir + '/' + algo, data_dir, collection, '.')
            
    if toshift['algorithms']:
        print(f"Post-vuv Algorithms for {collection}")
        for algo in os.listdir(os.path.join(data2_dir)):
            if not algo in algos: continue
            print("\t", algo)
            shift_songs(data2a_dir + '/' + algo, data_dir, collection, '.')

    if toshift['targets']:
        print(f"Targets for {collection}")
        shift_songs(data3_dir, data_dir, collection, '.txt', match='M.txt')
        
    if toshift['notes']:
        print(f"Notes for {collection}")
        shift_songs(data3_dir, data_dir, collection, '.txt', match='notes.txt')

    if toshift['mad']:
        print(f"MAD for {collection}")
        shift_songs(data5_dir, data_dir, collection, '.txt')

    if toshift['overrides']:
        print(f"Overrides for {collection}")
        shift_overrides(data4_dir, data_dir, collection, '.')
    print('')


## Sandbox

In [None]:
ndata_dir = "/Akamai/voice/data/ground-estimate/Teach Yourself Gurian Songs/Adila-Alipasha/AHDS2M.txt"
cdata = np.loadtxt(ndata_dir)
x1, y1 = cdata[:, 0], cdata[:, 1]
x2, y2 = align(x1, y1, 65, gurian=True)
np.savetxt("/Akamai/voice/data/ground-estimate/Teach Yourself Gurian Songs/Adila-Alipasha/AHDS2M_shifted.txt", np.vstack((x2, y2)).T, fmt='%.2f')

In [None]:
data_dir = '/Akamai/voice/data/pitches-vuv-crepe-assisted/'
for algo in os.listdir(data_dir):
    for e in os.listdir(os.path.join(data_dir, algo, "Teach Yourself Megrelian Songs")):
        if e == "Scherbaum Mshavanadze":
            os.rmdir(os.path.join(data_dir, algo, "Teach Yourself Megrelian Songs", e))

In [None]:
y = np.array([0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
start = 6
end = 10
chunk = np.array([0, 0, 9, 10, 11, 12])
shift = 3
ychunk = np.maximum(y[start+shift:chunk.size+start+shift], chunk)

y[start+shift:chunk.size+start+shift] = ychunk
y


In [None]:
a = np.array([1, 2, 3, 4, 5, 6])
print(a)
print(a[0:3])