### Main steps
For several song samples:

- load a song stft from data/stft/fft-window-size_fft-hop-length/Album-name/song-name-dir/startoffset-endoffset.stft
- Use the method in [Noll's paper](https://drive.google.com/file/d/104KZa5Zdww6NLlG7VzhKRKxS-sbp0Ej8/view?usp=sharing) to detect the one, two, or three pitches (fundamental frequencies) of the voices in the audio
    - Equation 14 in Noll gives the Cepstrum as an integral. Translated into Librosa functions this is
    - stft(log|stft(signal)|)
    
The idea is that if you take the Fourier transform of a voice you get peaks in the fundamental and harmonics. This looks like a wave with period = fundamental frequency, so you can take the Fourier transform of *it* to get the period. 

    
    

It's not quite that simple. If you take the Fourier transform of the Fourier transform you get the original signal back. 

The vocal cords buzz at a certain frequency. The shape of the space in the throat, mouth, and nose makes it resonate to this frequency and harmonics. Depending on the shape it resonates more or less at different freqencies. Fig. 4 of [Noll's paper](https://drive.google.com/file/d/104KZa5Zdww6NLlG7VzhKRKxS-sbp0Ej8/view) shows the sound of the vowel "A": it has a peak at about 500Hz and another peak at about 1800Hz. The small bumps in the spectrum show the pitch. There are 8-9 bumps per 1000Hz, so the pitch is about 120Hz.

To get Fig. 4 you need to take the Fourier transform of the signal, then take the log of the absolute value of that. In the Fourier transform the contributions of the vowel and the pitch are multiplied together. After taking the log they are added together, which lets us see the pitch and vowel separately in the graph. 

When we transform this new "signal" again using the Fourier transform it picks up the frequency of the bumps. That's the fundamental frequency.

In [None]:
import librosa
import os
from scipy import signal
from scipy.fftpack import fft, fftshift, ifft
from scipy.optimize import brent, fminbound
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import numpy as np
import math

data_dir = ""
pitch_dir = ""
maximum_pitch = 500
minimum_pitch = 60

def load_file(name):
    y, sr = librosa.load(data_dir + name, sr=None)
    return (y, sr)

In [1]:
def local_maxes (a):
    return a[np.r_[False, (a[1:-1] > a[:-2]) & (a[1:-1] >= a[2:]), False]]

tck = None

def cubic_interpolation_init (signal):
    global tck
    x_points = np.arange(0, signal.size)
    tck = interp1d(x_points, -signal, kind="cubic")
    
def get_cubic_interpolation (x):
    global tck
    try:
        return tck(x)
    except:
        return 0
    
def find_max_cubic_interpolation (l, r):
    global tck
    output = fminbound(get_cubic_interpolation, l, r, full_output=True)
    return [output[0], -output[1]]

def find_fundamental_frequency (y, sr, start_time, graph=False):
    #generate cepstral
    window_size = int(np.ceil(0.030 * sr))
    segment = y[np.int(start_time * sr) : np.int(start_time * sr + window_size)]
    window = signal.hann(window_size)[:segment.size]
    prod = np.multiply(segment, window)
    A = fft(prod)
    response = np.log(np.abs(fftshift(A)))
    final = np.real(ifft(np.multiply(response, window)))**2
    final = final[:final.size//2]
    
    # minimize values near zero
    final[0:np.int(0.0005 * sr)] = 0
    
    # find range
    mx, my = (0, 0)
    analysis_range = final[np.int(0.003 * sr):]
    maxes = np.flip(np.sort(local_maxes(analysis_range)))
    
    # initialize cubic interpolation
    cubic_interpolation_init(analysis_range)
    
    for i, j in enumerate(analysis_range):
        if j > my:
            mx = i
            my = j
            
    nx, ny = find_max_cubic_interpolation(mx-1, mx+1)
    
    if graph:
        # the range between the purple lines is what is considered for frequency
        plt.figure(figsize=(15, 3))
        plt.xlabel("Quefrency (ms)")
        plt.ylabel("Amplitude")
        plt.plot(final[:1000])   

        # the max peak is found inside the range set by these lines
        plt.axvline(x=0.003 * sr, color="purple")
        plt.axvline(x=0.015 * sr, color="purple")
        plt.axvline(x=(mx + np.int(0.003 * sr)), color='green')
    
    frequency = float(sr)/(nx + np.int(0.003 * sr))
    return frequency

In [None]:
def initialize_min_and_max (song):
    global data_dir, minimum_pitch, maximum_pitch
    
    if song[-6] == '1' or song[-6] == '2' or song[-6] == '3':
        try:
            pitch_data = open(data_dir + 'minmax').readlines()
        except FileNotFoundError:
            print('Please create a minmax file. Using default values for now')
            maximum_pitch = 500
            minimum_pitch = 60
            return
        for each in pitch_data:
            if each[0] == song[-6]:
                minimum_pitch = int(each.strip().split(' ')[1])
                maximum_pitch = int(each.strip().split(' ')[2])
    else:
        maximum_pitch = 500
        minimum_pitch = 60

In [2]:
def create_and_store (names):
    done = 1
    for name in names:
        initialize_min_and_max(name)
        y, sr = load_file(name)
        time_listing = np.arange(0, y.size/sr, 0.01)[:-1]
        
        f = np.empty(time_listing.size)
        i = 0
        for t in time_listing:
            f[i] = find_fundamental_frequency(y, sr, t)
            i += 1
            
        threshold = (f > maximum_pitch) ^ (f < minimum_pitch)
        f[threshold] = 0
        
        f = f.tolist()
        for i in range(len(f)):
            f[i] = str(time_listing[i]) + ' ' + str(f[i])
                    
        song_dir = pitch_dir + name
        song_dir = song_dir[:-4] + ".txt"
        
        fout = open(song_dir, "w+")
        fout.write("\n".join(f))
        fout.close()
        
        print(f"{done} done")
        done += 1

In [None]:
# Scherbaum Mshavandaze
parent_data_dir = '/Akamai/voice/data/Scherbaum Mshavanadze/'
parent_pitch_dir = '/Akamai/voice/data/pitches-raw/boersma/Scherbaum Mshavanadze/'

for collection in os.listdir(parent_data_dir):
    if os.path.isdir(f"{parent_data_dir}{collection}"):
        parts = []
        for part in os.listdir(f"{parent_data_dir}{collection}"):
            if part[-3:] == 'wav':
                parts.append(part)
                
        data_dir = parent_data_dir + collection + '/'
        pitch_dir = parent_pitch_dir + collection + '/'
        create_and_store(parts)
        
# Teach Yourself Megrelian Songs
parent_data_dir = '/Akamai/voice/data/Teach Yourself Megrelian Songs/'
parent_pitch_dir = '/Akamai/voice/data/pitches-raw/noll/Teach Yourself Megrelian Songs/'

for collection in os.listdir(parent_data_dir):
    if os.path.isdir(f"{parent_data_dir}{collection}"):
        if collection != 'mp3':
            parts = []
            for part in os.listdir(f"{parent_data_dir}{collection}"):
                if part[-3:] == 'wav':
                    parts.append(part)

            data_dir = parent_data_dir + collection + '/'
            pitch_dir = parent_pitch_dir + collection + '/'
            create_and_store(parts)
            
Teach Yourself Gurian Songs
parent_data_dir = '/Akamai/voice/data/Teach Yourself Gurian Songs/'
parent_pitch_dir = '/Akamai/voice/data/pitches-raw/noll/Teach Yourself Gurian Songs/'

for collection in os.listdir(parent_data_dir):
    if os.path.isdir(f"{parent_data_dir}{collection}"):
        if collection != 'mp3':
            parts = []
            for part in os.listdir(f"{parent_data_dir}{collection}"):
                if part[-3:] == 'wav':
                    parts.append(part)

            data_dir = parent_data_dir + collection + '/'
            pitch_dir = parent_pitch_dir + collection + '/'
            create_and_store(parts)
