In [None]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
from scipy.signal import butter
from scipy.fft import fft, ifft, rfft, irfft
import random
from scipy.signal import argrelmin, argrelmax, argrelextrema
import os

In [None]:
data_dir = ''
pitch_dir = ''

In [None]:
def load_file (song):
    y, sr = librosa.load(data_dir + song, sr=None)
    return (y, sr)

# smooth out the signal
# options: hamming, hanning, kaiser, barlett, blackman

def smooth(x,window_len=11,window='hanning'):
    if window_len<3:
        return x

    s=np.r_[x[window_len//2:0:-1],x,x[-1:-window_len//2:-1]]
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    return y

def preprocessing (sig, threshold):
    yf = rfft(sig)
    N = sig.size
    T = 1/sr
    xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
    yf[np.where(xf>threshold)] = 0
    pp_sig = irfft(yf)
    return pp_sig

In [None]:
def sgn (x):
    if x == 0:
        return 0
    elif x > 0:
        return 1
    else:
        return -1

def weighted_zero_crossing (a, n_o, N):
    try:
        rms = np.sqrt(np.sum(a[n_o+1:n_o+N+1] ** 2) / N)
        vect_sgn = np.vectorize(sgn)
        zcr = np.sum(np.abs(np.subtract(vect_sgn(a[n_o+1:n_o+N]), vect_sgn(a[n_o:n_o+N-1]))))
    except:
        return 0
    
    if zcr == 0:
        return 0
    return rms/zcr

In [None]:
def detect_threshold (histogram, minima, maxima, graph=False):
    highest_peak = 0
    highest_peak_index = 0
    
    for i in range(maxima.size):
        if maxima[i] != 0 and histogram[maxima[i]] > highest_peak:
            highest_peak = histogram[maxima[i]]
            highest_peak_index = maxima[i]

    maxima = maxima[maxima <= highest_peak_index]
    minima = minima[minima <= highest_peak_index]

    p_and_k = np.sort(np.concatenate((maxima, minima)))
    max_difference = 0
    best_valley = None
    
    for i in range(1, p_and_k.size):
        if histogram[p_and_k[i]] - histogram[p_and_k[i-1]] > max_difference:
            if best_valley and histogram[p_and_k[i-1]]/histogram[best_valley] > 1.5:
                continue
            max_difference = histogram[p_and_k[i]] - histogram[p_and_k[i-1]]
            best_valley = p_and_k[i-1]
    
    if not best_valley:
        best_valley = 0
    plt.figure(figsize=(20, 5))
    plt.plot(histogram)
    plt.axvline(best_valley)
    return best_valley

In [None]:
h = 0
def voiced_unvoiced_detection (a, bins, signal=False, histogram=False, wcz_v_rms=False, threshold=False, detection=False):
    global h
    
    epsilon = 0
    a = preprocessing(a, 500)
    t = np.arange(0, a.size/sr, 0.01)
    frames = (t * sr).astype('int')

    window_size = int(sr * 0.01 * 1.50)

    wcz = np.array([weighted_zero_crossing(a, frame, window_size) for frame in frames])
    rg = np.nanmax(wcz[wcz != np.inf])
    wcz_hist = smooth(np.histogram(wcz, bins=bins, range=(0,rg))[0])
    
    wcz_min = argrelmin(wcz_hist)[0]
    wcz_max = argrelmax(wcz_hist)[0]
    wcz_thresh = detect_threshold(wcz_hist, wcz_min, wcz_max, graph=threshold) * (rg/bins) + epsilon # valley of WCZ

    if signal:
        # graph of weighted zero crossing
        plt.figure(figsize=(20, 5))
        plt.plot(wcz)
        
    if wcz_v_rms:
        # graphs RMS vs WZC
        # generates lines of slope: 6, 8, 10, 12, 14, 16, 18 ...
        plt.figure(figsize=(26, 5))
        plt.plot(wcz, r, '.', markersize=3)

        # divides the graph based upon
        # line is to the left of the valley of histogram
        plt.figure(figsize=(20, 5))
        plt.plot(wcz[wcz <= wcz_thresh], r[wcz <= wcz_thresh], '.', markersize=3)
        plt.plot(wcz[wcz > wcz_thresh], r[wcz > wcz_thresh], '.', markersize=3)

        # zoom-in on the RMS vs WCZ for the frames declared unvoiced
        plt.figure(figsize=(20, 5))
        plt.plot(wcz[(wcz <= wcz_thresh) & (r <= rms_thresh)], r[(wcz <= wcz_thresh) & (r <= rms_thresh)], '.', markersize=3)
        plt.plot(wcz[(wcz <= wcz_thresh) & (r > rms_thresh)], r[(wcz <= wcz_thresh) & (r > rms_thresh)], '.', markersize=3)
        
    if histogram:
        # histogram of WCZ (smoothed out with hanning)
        plt.figure(figsize=(20, 5))
        plt.plot(np.arange(0, rg, rg/bins)[:wcz_hist.size], wcz_hist)
        plt.axvline(wcz_thresh)
        plt.xlabel("Amplitude")
        plt.ylabel("Frequency")
        plt.title("Histogram from " + str(int(round(h))) + " : " + str(int(round(h+t[t.size-1]))))
        plt.show()
        
    # corrections to increase performance
    def erase_short_signal(signal, orig, max_signal_len, context):
        n_sig = np.copy(signal)
        s = 0
        i = 0

        while (i != signal.size):
            if not signal[i]:
                if i - s < max_signal_len and i - s != 0 and np.average(orig[max(s-context//2, 0):i+context//2]) < 1.2*wcz_thresh:
                    n_sig[s:i] = False
                s = i + 1

            i += 1        
        return n_sig

    ic = erase_short_signal(wcz > wcz_thresh, wcz, 30, 200)
    c = ~ic

    if detection:
        plt.figure(figsize=(20, 5))
        plt.plot(c.nonzero()[0] + h * 100, wcz[c], '.', markersize=3)
        plt.plot(ic.nonzero()[0] + h * 100, wcz[ic], '.', markersize=3)
        plt.axhline(wcz_thresh)
        plt.axhline(wcz_thresh * 1.5, color='red')
        plt.axhline(0.005, color='purple')
    
    
    h += t[t.size-1]
    return ic

In [None]:
uv = None
done = 0

def create_and_store (song_names):
    global y, sr, dp, uv, done
    for song_name in song_names:
        y, sr = load_file(song_name)
        duration = y.size/sr
        frames = np.arange(0, duration, 0.01)
        a = voiced_unvoiced_detection(y, 100, threshold=True).astype('int')
        uv = a
        f = [0 for i in range(frames.size)]
        for i in range(len(f)):
            f[i] = str(frames[i]) + ' '  + str(a[i])
                                    
        song_dir = pitch_dir + song_name
        song_dir = song_dir[:-4] + ".txt"
        
        done += 1
        print(f"{done} done")
        
        try:
            fout = open(song_dir, "w+")
            fout.write("\n".join(str(x) for x in f))
            fout.close()
        except PermissionError as error:
            print(error)

In [None]:
# Scherbaum Mshavandaze
# parent_data_dir = '/Akamai/voice/data/Scherbaum Mshavanadze/'
# parent_pitch_dir = '/Akamai/voice/data/pitches-raw/v-uv/Scherbaum Mshavanadze/'

# for collection in os.listdir(parent_data_dir):
#     if os.path.isdir(f"{parent_data_dir}{collection}"):
#         parts = []
#         for part in os.listdir(f"{parent_data_dir}{collection}"):
#             if part[-3:] == 'wav':
#                 parts.append(part)
                
#         data_dir = parent_data_dir + collection + '/'
#         pitch_dir = parent_pitch_dir + collection + '/'
#         create_and_store(parts)
        
# Teach Yourself Megrelian Songs
parent_data_dir = '/Akamai/voice/data/Teach Yourself Megrelian Songs/'
parent_pitch_dir = '/Akamai/voice/data/pitches-raw/v-uv/Teach Yourself Megrelian Songs/'

for collection in os.listdir(parent_data_dir):
    if os.path.isdir(f"{parent_data_dir}{collection}"):
        if collection != 'mp3' and collection != '.ipynb_checkpoints':
            print(collection)
            parts = []
            for part in os.listdir(f"{parent_data_dir}{collection}"):
                if part[-3:] == 'wav':
                    parts.append(part)
                
            data_dir = parent_data_dir + collection + '/'
            pitch_dir = parent_pitch_dir + collection + '/'
            create_and_store(parts)
            
# Teach Yourself Gurian Songs
# parent_data_dir = '/Akamai/voice/data/Teach Yourself Gurian Songs/'
# parent_pitch_dir = '/Akamai/voice/data/pitches-raw/v-uv/Teach Yourself Gurian Songs/'

# for collection in os.listdir(parent_data_dir):
#     if os.path.isdir(f"{parent_data_dir}{collection}"):
#         parts = []
#         for part in os.listdir(f"{parent_data_dir}{collection}"):
#             if part[-3:] == 'wav':
#                 parts.append(part)

#         print(parts)
#         data_dir = parent_data_dir + collection + '/'
#         pitch_dir = parent_pitch_dir + collection + '/'
#         create_and_store(parts)