### Align the three parts of all songs

1. Align the ends of the final notes in the raw recordings.

In [None]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
from scipy.signal import butter
from scipy.fft import fft, ifft, rfft, irfft
import random
from scipy.signal import argrelmin, argrelmax, argrelextrema
import os

In [None]:
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import pandas as pd
import plotly.io as pio
from plotly.offline import init_notebook_mode, iplot, plot
from IPython.display import display, clear_output
from ipywidgets import widgets, Button, HBox, VBox
from plotly.colors import DEFAULT_PLOTLY_COLORS
from plotly.callbacks import LassoSelector, BoxSelector


In [None]:
raw_dir = "/Akamai/voice/data/"
estimate_dir = "/Akamai/voice/data/ground-estimate/"
collections = {"sm":"Scherbaum Mshavanadze",
               "guria":"Teach Yourself Gurian Songs",
               "megrelia":"Teach Yourself Megrelian Songs"}

collection_directories = {"sm":
                          ["GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919",
                           "GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713",
                           "GVM019_DaleKojas_DidgoriVillage_Didgori_20160707",
                           "GVM031_EliaLrde_LakhushdiVillage_MuradGigoGivi_20160819",
                           "GVM097_KristeAghsdga_LakhushdiVillage_MuradGigoGivi_20160819"],
                          "guria":
                          ["Adila-Alipasha",
                           "Alaverdi",
                           "Beri Ak'vans Epareba", 
                           "Brevalo",             
                           "Chven-Mshvidoba",    
                           'Didi Khnidan',     
                           "Gakhsovs, T'urpa",
                           "Indi-Mindi",
                           "K'alos Khelkhvavi",
                           "Khasanbegura",     
                           "Lat'aris Simghera",    
                           "Manana",         
                           "Maq'ruli",               
                           "Masp'indzelsa Mkhiarulsa", 
                           "Me-Rustveli",        
                           'Mival Guriashi (1)' ,
                           'Mival Guriashi (2)' , 
                           "Mok'le Mravalzhamieri",
                           "Mts'vanesa Da Ukudosa", 
                           'Nanina (1)',      
                           'Nanina (2)',          
                           "Orira",
                           "P'at'ara Saq'varelo",                
                           'Pikris Simghera',
                           "Sabodisho",
                           'Sadats Vshobilvar',
                           "Shermanduli",
                           "Shvidk'atsa",
                           'Supris Khelkhvavi',
                           "Ts'amok'ruli"],
                         "megrelia":
                          ["Vojanudi Chkim Jargvals",
                           "Ak'a Si Rekisho",
                           "Gepshvat Ghvini",
                           "Io _ Chkin Kiana",
                           "Mesishi Vardi",
                           "Meureme",
                           "Mi Re Sotsodali_",
                           "Mole Chit'i Gilakhe",
                           "O Da"]}


In [None]:
def sgn (x):
    if x == 0:
        return 0
    elif x > 0:
        return 1
    else:
        return -1

def load_file (songfile):
    y, sr = librosa.load(songfile, sr=None)
    return (y, sr)

def pressure(y, sr, window):
    start = 0
    ysq = y**2
    # tried zero crossings - didn't seem to help
    #vect_sgn = np.vectorize(sgn)
    #yzcr = np.abs(np.subtract(vect_sgn(y[1:]), y[0:-1]))
    prs = np.zeros(len(y)//(sr//100))
    for i in range(len(prs)):
        start = i*(sr//100)
        try:
            prs[i] = np.sqrt(np.sum(ysq[start:start+window]) / window)
            #prs[i] = np.max(np.abs(ysq[start:start+window]))

            # tried zero crossings - didn't seem to help
            #rms = np.sqrt(np.sum(ysq[start:start+window]) / window)
            #vect_sgn = np.vectorize(sgn)
            #zcr = np.sum(np.abs(np.subtract(vect_sgn(y[start+1:start+window]), y[start:start+window-1])))
            #zcr = np.sum(yzcr[start:start+window-1])
            #if zcr == 0:
            #    zcr = 1
            #prs[i] = rms/zcr

        except:
            prs[i] = 0
    return prs

In [None]:
traceIds = {}
def pressure_graph(song):
    global traceIds
    traces = []
    traceId = 0
    for part, data in prs[song].items():
        try:
            trace = go.Scattergl(
                        x = np.arange(0,len(data)/100,0.01),
                        y = data,
                        name=part,
                        mode="markers",
                        visible=True
                    )

            traceIds[part] = traceId
            traceId += 1
            traces.append(trace)

        except:
            print(f"{algolabel}: {audio_type} not available")

    layout = go.Layout(title='Pressure Over Time')
    figure = go.Figure(data=traces, layout=layout)
    fig = go.FigureWidget(figure)
    fig.update_traces(marker=dict(size=3))
    display(widgets.VBox([fig]))
   

In [None]:
traceIds = {}
def low_pressure_graph(song):
    global traceIds
    traces = []
    traceId = 0
    for part, data in lowprs[song].items():
        try:
            trace = go.Scattergl(
                        x = np.arange(0,len(data)/100,0.01),
                        y = data,
                        name=part,
                        mode="markers",
                        visible=True
                    )

            traceIds[part] = traceId
            traceId += 1
            traces.append(trace)

        except:
            print(f"{algolabel}: {audio_type} not available")

    layout = go.Layout(title='Pressure Over Time')
    figure = go.Figure(data=traces, layout=layout)
    fig = go.FigureWidget(figure)
    fig.update_traces(marker=dict(size=3))
    display(widgets.VBox([fig]))
   

In [None]:
traceIds = {}
def shifted_low_pressure_graph(song):
    global traceIds
    traces = []
    traceId = 0
    for part, data in lowprs[song].items():
        if part != 'mix':
            data = np.roll(data, imax[song][part])
        try:
            trace = go.Scattergl(
                        x = np.arange(0,len(data)/100,0.01),
                        y = data,
                        name=part,
                        mode="markers",
                        visible=True
                    )

            traceIds[part] = traceId
            traceId += 1
            traces.append(trace)

        except:
            print(f"{algolabel}: {audio_type} not available")

    layout = go.Layout(title='Pressure Over Time')
    figure = go.Figure(data=traces, layout=layout)
    fig = go.FigureWidget(figure)
    fig.update_traces(marker=dict(size=3))
    display(widgets.VBox([fig]))
   

In [None]:
traceIds = {}
def pressure_histogram(song):
    global traceIds
    traces = []
    traceId = 0
    for part, data in prs[song].items():
        try:
            hist, bins = np.histogram(data, bins=100)
            bins = bins[:len(bins)-1]
            trace = go.Scattergl(
                        x = bins,
                        y = hist,
                        name=part,
                        mode="markers",
                        visible=True
                    )

            traceIds[part] = traceId
            traceId += 1
            traces.append(trace)

        except:
            print(f"{algolabel}: {audio_type} not available")

    layout = go.Layout(title='Pressure Histogram')
    figure = go.Figure(data=traces, layout=layout)
    fig = go.FigureWidget(figure)
    fig.update_traces(marker=dict(size=3))
    display(widgets.VBox([fig]))
   

In [None]:
traceIds = {}
def sorted_pressure_graph(song):
    global traceIds
    traces = []
    traceId = 0
    for part, data in prs[song].items():
        try:
            trace = go.Scattergl(
                        x = np.sort(data),
                        y = np.array(range(len(data)))/float(len(data)),
                        name=part,
                        mode="markers",
                        visible=True
                    )

            traceIds[part] = traceId
            traceId += 1
            traces.append(trace)

        except:
            print(f"{algolabel}: {audio_type} not available")

    layout = go.Layout(title='Sorted Pressure Graph')
    figure = go.Figure(data=traces, layout=layout)
    fig = go.FigureWidget(figure)
    fig.update_traces(marker=dict(size=3))
    display(widgets.VBox([fig]))
   

In [None]:
from string import ascii_lowercase
prs = dict()
lowprs = dict()
for coll, collDir in collections.items():
    for song in collection_directories[coll]:
        prs[song] = dict()
        lowprs[song] = dict()
        print(song)
        for part in os.listdir(f"{raw_dir}{collDir}/{song}/"):
            if part[-4:] == '.wav':
                y, sr = load_file(f"{raw_dir}{collDir}/{song}/{part}")
                if (len(part) >= 10) and (part[-10:-6] == 'AHDS'):
                    print("  ", part)
                    prs[song][part[-6]] = pressure(y, sr, sr//2)
                elif ((len(part) >= 10) and (part[-10:-6] == 'AOLS')) or (part[-5] in ascii_lowercase+")"):
                    print("  ", part)
                    prs[song]['mix'] = pressure(y, sr, 100)

        for part in prs[song]:
            lowprs[song][part] = np.array(prs[song][part])
            sorted_lowprs = np.sort(lowprs[song][part])
            pctile = sorted_lowprs[len(sorted_lowprs)//20]
            lowprs[song][part][lowprs[song][part]>pctile] *= 0


In [None]:
imax = dict()
for coll, collDir in collections.items():
    for song in collection_directories[coll]:
        if song in lowprs:
            p1 = np.pad(lowprs[song]["1"], (150,150))
            p2 = np.pad(lowprs[song]["2"], (150,150))
            p3 = np.pad(lowprs[song]["3"], (150,150))
            
            # adjustments for optimizing "3-way correlation"
            p1[p1>0] *= -1
            p1[p1<0] += 1
            p2[p2>0] *= -1
            p2[p2<0] += 1
            p3[p3>0] *= -1
            p3[p3<0] += 1
            
            maxCorr = 0
            imax[song] = dict()
            imax[song]["1"] = 0
            imax[song]["2"] = 0
            imax[song]["3"] = 0
            length = np.min((len(p1), len(p2), len(p3))) - 300
            for i in range(-150,150):
                for j in range(-150,150):
                    corr = np.sum((p1[150+i:150+i+length])*(p2[150+j:150+j+length])*(p3[150:150+length]))
                    if corr > maxCorr:
                        maxCorr = corr
                        imax[song]["1"] = i
                        imax[song]["2"] = j

            # shift right instead of left (and change sign)
            maxShift = np.max((imax[song]["1"], imax[song]["2"], imax[song]["3"]))
            for i in ["1", "2", "3"]:
                imax[song][i] = maxShift - imax[song][i]
            print(song, imax[song]["1"], imax[song]["2"], imax[song]["3"], maxCorr)
            #print(np.nonzero((p1[150+66:150+66+length])*(p2[150+22:150+22+length])*(p3[150:150+length])))

In [None]:
for coll, collDir in collections.items():
    for song in collection_directories[coll]:
        if song in lowprs:
            songDir = song
            if song[:3] == 'GVM':
                songDir = song.split('_')[0]
            dest = f"{estimate_dir}{collDir}/{songDir}/shifts.txt"
            np.savetxt(dest, np.c_[imax[song]["1"], imax[song]["2"], imax[song]["3"]], fmt="%d")
            print(song)

In [None]:
this_song="Nanina (1)"
pressure_graph(this_song)

In [None]:
low_pressure_graph(this_song)

In [None]:
shifted_low_pressure_graph(this_song)

In [None]:
pressure_histogram(this_song)

In [None]:
sorted_pressure_graph(this_song)

In [None]:
x = np.array((1,2,3))
x

In [None]:
np.pad(x, (5, 5))