### Main steps
For several song samples:

- load a song stft from data/stft/fft-window-size_fft-hop-length/Album-name/song-name-dir/startoffset-endoffset.stft
- Use the method in [Hermes's paper](https://drive.google.com/file/d/1yYo9DB9Vh0tHoE63swfu78u2_p_fcAD5/view?usp=sharing) (see Fig. 1) to detect the one, two, or three pitches (fundamental frequencies) of the voices in the audio

In [6]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import os
import scipy.signal
import scipy.fft
import scipy
import librosa.display
import math
from scipy.signal import butter
from scipy.fft import fft, ifft, rfft, irfft


data_dir = '/Akamai/voice/data/'
pitch_dir = ''
tck = None
max_pitch = 500
min_pitch = 60

In [3]:
def load_file (file):
    global data_dir
    y, sr = librosa.load(data_dir + file, sr=None)
    return [y, sr]

def smooth(x,window_len=11,window='hanning'):
    if window_len<3:
        return x
    s=np.r_[x[window_len-1:0:-1],x,x[-1:-window_len:-1]]
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')
    y=np.convolve(w/w.sum(),s,mode='valid')
    
    return y[(window_len//2):-(window_len//2)]  
    
def init_cubic_spline_interpolation (x_points, y_points):
    # cubic spline interpolation
    # chose 168 different values since 48 reccommended for every octave
    
    global tck
    tck = scipy.interpolate.interp1d(x_points, y_points, kind="cubic", fill_value='extrapolate')
    
    
def calculate_P (x):
    return tck(x) * np.arctan(x)

def calculate_h_n (n):
    return 0.84**(n-1)

def estimate_pitch (vals):
    return np.argmax(vals)

def preprocessing (sig, sr, threshold):
    yf = rfft(sig)
    N = sig.size
    T = 1/sr
    xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
    yf[np.where(xf>threshold)] = 0
    pp_sig = irfft(yf)
    return pp_sig

In [4]:
def initialize_min_and_max (song):
    global data_dir, min_pitch, max_pitch
    
    if song[-6] == '1' or song[-6] == '2' or song[-6] == '3':
        try:
            pitch_data = open(data_dir + 'minmax').readlines()
        except FileNotFoundError:
            print('Please create a minmax file. Using default values for now')
            max_pitch = 500
            min_pitch = 60
            return
        for each in pitch_data:
            if each[0] == song[-6]:
                min_pitch = int(each.strip().split(' ')[1])
                max_pitch = int(each.strip().split(' ')[2])
    else:
        max_pitch = 500
        min_pitch = 60

In [5]:
from scipy.signal import argrelmax
from scipy.optimize import fminbound

def hermes (audio, sr, graph=False):
#     audio = preprocessing(audio, sr, 800)
    
    N = 16
    hamming_window = scipy.signal.hamming(audio.shape[0])
    pad = 5000
    prod = np.multiply(audio, hamming_window)
    prod = np.pad(prod, (0, pad), 'constant')
    x = np.linspace(0, sr//2, prod.size//2)
    ham_fft = np.abs(scipy.fft.rfft(prod))[:-1]
        
    x = x[x <= (N+1) * max_pitch]
    ham_fft = ham_fft[:x.size]
#     ham_fft_original = np.array(ham_fft, copy=True)
    
    if graph:
        plt.figure(figsize=(20, 5))
        plt.plot(ham_fft[:500])
    # detect maximums
#     maximi = argrelmax(ham_fft)[0]
#     for i in range(0, maximi.size-1):
#         s = int(maximi[i] + (20/x[1]))
#         while s < int(maximi[i+1] - (20/x[1])):
#             ham_fft[s] = 0
#             s += 1
    #detect maximums
    
    if graph:
        plt.plot(ham_fft[:500])

    han_fft = smooth(ham_fft)
    init_cubic_spline_interpolation(x, han_fft)
    
    m = 1
    n = np.arange(1, N+1)
    ss = np.arange(min_pitch, max_pitch+1, m)
    ns = np.tile(n, (ss.size, 1))
    ms = np.transpose(np.multiply(np.transpose(ns), ss))
    os = np.tile(calculate_h_n(n), (ss.size, 1))
    
    fin2 = np.apply_along_axis(calculate_P, 1, ms)
    values = smooth(smooth(np.sum(np.multiply(os, fin2), axis=1)))
    
    tck_2 = scipy.interpolate.interp1d(ss, -values, kind="cubic", fill_value='extrapolate')
    pitch_est = fminbound(tck_2, min_pitch + estimate_pitch(values) - 1, min_pitch + estimate_pitch(values) + 1, full_output=True)
        
    if graph:
        plt.figure(figsize=(20, 5))
        plt.plot(ss, -values)
        plt.axvline(pitch_est[0])
        #     print("Estimated pitch: " + str(pitch_est[0]) + " Hz")
    return pitch_est[0] # min_pitch + estimate_pitch(values) * m

In [None]:
# Scherbaum Mshavandaze
# parent_data_dir = '/Akamai/voice/data/Scherbaum Mshavanadze/'
# parent_pitch_dir = '/Akamai/voice/data/pitches/hermes/Scherbaum Mshavanadze/'

# for collection in os.listdir(parent_data_dir):
#     if os.path.isdir(f"{parent_data_dir}{collection}"):
#         parts = []
#         for part in os.listdir(f"{parent_data_dir}{collection}"):
#             if part[-3:] == 'wav':
#                 parts.append(part)
                
#         data_dir = parent_data_dir + collection + '/'
#         pitch_dir = parent_pitch_dir + collection + '/'
#         create_and_store(parts)
        
# Teach Yourself Megrelian Songs
# parent_data_dir = '/Akamai/voice/data/Teach Yourself Megrelian Songs/'
# parent_pitch_dir = '/Akamai/voice/data/pitches/hermes/Teach Yourself Megrelian Songs/'

# for collection in os.listdir(parent_data_dir):
#     if os.path.isdir(f"{parent_data_dir}{collection}"):
#         if collection != 'mp3':
#             parts = []
#             for part in os.listdir(f"{parent_data_dir}{collection}"):
#                 if part[-3:] == 'wav':
#                     parts.append(part)
                
#             data_dir = parent_data_dir + collection + '/'
#             pitch_dir = parent_pitch_dir + collection + '/'
#             create_and_store(parts)
            
# Teach Yourself Gurian Songs
# parent_data_dir = '/Akamai/voice/data/Teach Yourself Gurian Songs/'
# parent_pitch_dir = '/Akamai/voice/data/pitches/hermes/Teach Yourself Gurian Songs/'

# check = True

# for collection in os.listdir(parent_data_dir):
#     if os.path.isdir(f"{parent_data_dir}{collection}"):
#         if collection == "Masp'indzelsa Mkhiarulsa":
#             check = False
            
#         if not check:
#             parts = []
#             for part in os.listdir(f"{parent_data_dir}{collection}"):
#                 if part[-3:] == 'wav':
#                     parts.append(part)
            
#             print(parts)
#             data_dir = parent_data_dir + collection + '/'
#             pitch_dir = parent_pitch_dir + collection + '/'
#             create_and_store(parts)
            