In [1]:
import numpy as np
import librosa
import matplotlib.pyplot as plt

data_dir = "/Akamai/voice/data/"
def load_file (name):
    y, sr = librosa.load(data_dir+name)
    return (y, sr)

In [2]:
import numpy as np
y, sr = load_file("Scherbaum Mshavanadze/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919_AHDS1M.mp3")



In [31]:
frames = np.arange(0, y.size/sr, 0.00997732426)
frames_in_samples = np.arange(0, y.size, 220)

## Periodicity Measure

In [3]:
from scipy.fftpack import dct

def find_periodicity_max (n_o, graph=False):
    global y
    
    N = 1000
#     hanning = np.hamming(y.size)
#     hanning.astype('complex')
#     hannign = hanning ** 2
#     hanning_squared_sum = np.sum(hanning)
#     hanning /= hanning_squared_sum
#     hanning = np.sqrt(hanning)
    
    processed = y
    
    min_period = np.int(sr/500)
    max_period = np.int(sr/50)

    def autocorrelation (l):                
        f = processed[n_o:N-abs(l)+n_o]
        s = processed[abs(l)+n_o:N+n_o]
        return (1/N) * np.sum(np.multiply(f, s))

    def vlamdf (l):        
        f = processed[n_o+l:n_o+2*l]
        s = processed[n_o:l+n_o]
        numerator = np.sum(np.abs(np.subtract(f, s)))
        f = processed[n_o:2*l-1+n_o]
        denominator = 0.5 * np.sum(np.abs(f))
        return numerator/denominator

    def enhanced_acf (p_l, N_max, arg_max):
        return N_max * ((N-1)/(N-arg_max)) - p_l
    
    vect_autocorrelation = np.vectorize(autocorrelation)
    vect_vlamdf = np.vectorize(vlamdf)

    epsilon = 1
    correlation = vect_autocorrelation(np.arange(N))
    p_l = np.nan_to_num(vect_vlamdf(np.arange(N)))
    n_l = np.divide(correlation, p_l + epsilon)
    
    k_l = enhanced_acf(p_l, np.max(p_l), np.argmax(p_l))
    j_l = np.multiply(n_l, k_l)
    
    def sphf (tau):
        y = np.arange(1, np.int(N/tau)) * tau
        return np.sum(j_l[y]) * tau
    
    vect_sphf = np.vectorize(sphf)
    
    p_t = vect_sphf(np.arange(min_period, max_period+1))
    cy = np.max(p_t)
    
    if graph:
        plt.figure()
        plt.plot(p_t)
        plt.xlabel('sample rate')
        plt.ylabel('amplitude')
        plt.title('periodicity analysis')
    
    return cy

print(find_periodicity_max(1 * sr)) # unvoiced
print(find_periodicity_max(20 * sr)) # voiced

0.017663507518839584
15.460250478124356




## Zero-crossing rate-weighted RMS energy

In [4]:
def sgn (x):
    if x == 0:
        return 0
    elif x > 0:
        return 1
    else:
        return -1

def find_weighted_zero_crossing (n_o):
    N = 1000
    processed = y
    rms = np.sqrt(np.sum(processed[n_o+1:n_o+N+1] ** 2) / N)
    
    vect_sgn = np.vectorize(sgn)
    zcr = np.sum(np.abs(np.subtract(vect_sgn(processed[n_o+1:n_o+N]), vect_sgn(processed[n_o:n_o+N-1]))))
    
    return rms/zcr
        
print(find_weighted_zero_crossing(1 * sr)) # unvoiced
print(find_weighted_zero_crossing(20 * sr)) # voiced

7.571771595403471e-05
0.0012426604337535834


## Kaiser-Teager frame energy

In [5]:
def kaiser_teager (f, s):
    return f**2 - np.multiply(f, s)

def find_kaiser_teager_frame_energy (n_o):
    N = 1000
    processed = y
    n = processed[np.arange(n_o, n_o + N+1)]
    
    k_t = kaiser_teager(n[1:], n[:-1])
    return np.sqrt(np.sum(k_t))

print(find_kaiser_teager_frame_energy(1 * sr)) # unvoiced
print(find_kaiser_teager_frame_energy(20 * sr)) # voiced

0.029672684
0.86064273


## Normalized low-frequency energy ratio

In [124]:
from scipy import signal
from scipy.fft import fftshift

spectogram = None

def generate_spectogram():
    global spectogram
    f, t, Sxx = signal.spectrogram(y, sr, nperseg=1000, noverlap=780)
    spectogram = Sxx
    print(spectogram.shape)
    
def find_low_frequency_energy_ratio (n_o):
    processed = y
    N = 1000
    total_sum = np.sum(spectogram) / N
    
    return np.sum()
