In [22]:
import os
import wave
import matplotlib.pyplot as plt

from numpy import argmax, mean, diff, log, percentile, frombuffer, polyfit, arange
from matplotlib.mlab import find
from scipy.signal import blackmanharris, fftconvolve, kaiser, decimate
from numpy.fft import rfft
from __future__ import division

%matplotlib inline

def parabolic(f, x):
    """Quadratic interpolation for estimating the true position of an
    inter-sample maximum when nearby samples are known.
   
    f is a vector and x is an index for that vector.
   
    Returns (vx, vy), the coordinates of the vertex of a parabola that goes
    through point x and its two neighbors.
   
    Example:
    Defining a vector f with a local maximum at index 3 (= 6), find local
    maximum if points 2, 3, and 4 actually defined a parabola.
   
    In [3]: f = [2, 3, 1, 6, 4, 2, 3, 1]
   
    In [4]: parabolic(f, argmax(f))
    Out[4]: (3.2142857142857144, 6.1607142857142856)
   
    """
    try:
        xv = 1/2. * (f[x-1] - f[x+1]) / (f[x-1] - 2 * f[x] + f[x+1]) + x
        yv = f[x] - 1/4. * (f[x-1] - f[x+1]) * (xv - x)
    except:
        return (0,0)
    return (xv, yv)

# from: https://github.com/endolith/waveform-analyzer
def freq_from_autocorr(sig, fs):
#     Estimate frequency using autocorrelation

    # Calculate autocorrelation (same thing as convolution, but with
    # one input reversed in time), and throw away the negative lags
    corr = fftconvolve(sig, sig[::-1], mode='full')
    corr = corr[len(corr)//2:]

    # Find the first low point
    d = diff(corr)
    greaterThanZero = (find(d > 0))
    
    if len(greaterThanZero) == 0:
        return 1000
    else :
#         print(len(greaterThanZero))
#         print(greaterThanZero)

        start = greaterThanZero[0]

        # Find the next peak after the low point (other than 0 lag).  This bit is
        # not reliable for long signals, due to the desired peak occurring between
        # samples, and other peaks appearing higher.
        # Should use a weighting function to de-emphasize the peaks at longer lags.
        peak = argmax(corr[start:]) + start
        px, py = parabolic(corr, peak)

        return fs / px


def voice_from_signal(signal, srate, step = 512, width = 4096, low = 50, high = 280):
    voice = []
    start = 0
    end = len(signal)
    while start < end - width:
        hertz_freq = freq_from_autocorr(signal[start:(start + width)], srate)
        abs_sig = []

        if hertz_freq > low and hertz_freq < high:
            voice = voice + [hertz_freq]
        start += step
    return voice

def format_sig(data, channels, method = 'first'):
    new_sig = []
    try:
        sig = frombuffer(data, dtype='int16').reshape(-1, channels)
        for val in sig[:,0]:
            new_val = val / 2**15
            if abs(new_val) >= 0.0:
                new_sig.append(new_val)
    except:
        new_sig = []
    return new_sig

def check_voice(sig, framerate, low = 35, high = 280, width = 16384, step = 8192):
    tab = voice_from_signal(sig, framerate, low = low, high = high, width = width, step = step)
    avr = mean(tab)
    q25 = percentile(tab, 25)
    q75 = percentile(tab, 75)
    iqr = q75 - q25
    if avr >= 145:
        gender = 'K'
    elif iqr >= 135:
        gender = 'K'
    else:
        gender = 'M'
    #print(gender)
    return(avr, iqr) 

errors = [ '059_K.wav', '028_K.wav', '075_M.wav', '019_M.wav', ]

if __name__ == '__main__':
    good = 0
    for wav_file in os.listdir('train'):#[:40]:
    #for wav_file in errors:#[:40]:
        print(wav_file)
        w = wave.open('train/' + wav_file)
        framerate = w.getframerate()
        frames = w.getnframes()
        channels = w.getnchannels()
        width = w.getsampwidth()       
        data = w.readframes(frames)
        sig = format_sig(data, channels)
        #plt.plot(sig)
        # best arguments: (sig, framerate, low = 35, high = 280, width = 16384, step = 8192)
        try:
            avr,iqr = check_voice(sig, framerate, low = 35, high = 280, width = 16384, step = 8192)            
        except:
            try:
                avr,iqr = check_voice(sig, framerate, low = 35, high = 280, width = 2048, step = 2048)
            except:
                avr = 200
                print("ERROR")
            #pass
        if avr >= 170:
            gender = 'K'
        elif iqr >= 135:
            gender = 'K'
        else:
            gender = 'M'
        print(gender)
        if gender == wav_file[4]:
            good += 1
        else:
            print(avr, iqr) 
        print("-----")
    print(good)

021_M.wav
M
-----
011_M.wav
M
-----
043_M.wav
M
-----
091_M.wav
M
-----
052_M.wav
M
-----
086_K.wav
K
-----
037_K.wav
K
-----
010_M.wav
M
-----
031_K.wav
K
-----
081_K.wav
K
-----
023_M.wav
M
-----
029_K.wav
K
-----
008_K.wav
K
-----
084_M.wav
M
-----
078_M.wav
M
-----
079_K.wav
K
-----
036_K.wav
K
-----
071_M.wav
M
-----
058_M.wav
M
-----
041_K.wav
K
-----
089_M.wav
M
-----
049_M.wav
M
-----
012_K.wav
K
-----
048_K.wav
K
-----
014_K.wav
K
-----
085_K.wav
K
-----
090_M.wav
M
-----
063_M.wav
M
-----
050_K.wav
K
-----
072_K.wav
K
-----
074_K.wav
K
-----
087_M.wav
M
-----
066_K.wav
K
-----
044_K.wav
K
-----
018_K.wav


  ret = ret.dtype.type(ret / rcount)


K
-----
035_M.wav
M
-----
083_K.wav
M
146.844675662 131.304238967
-----
040_K.wav
K
-----
006_K.wav
K
-----
004_M.wav
M
-----
064_M.wav
M
-----
051_K.wav
M
168.683994631 40.8301047448
-----
070_M.wav
M
-----
002_M.wav
M
-----
080_M.wav
M
-----
047_K.wav
K
-----
034_K.wav
K
-----
017_M.wav
M
-----
039_M.wav
M
-----
028_K.wav
K
-----
027_M.wav
M
-----
056_M.wav
M
-----
057_K.wav
K
-----
003_K.wav
K
-----
001_K.wav
K
-----
059_K.wav
ERROR
K
-----
009_K.wav
K
-----
005_M.wav
M
-----
053_M.wav
M
-----
025_K.wav
K
-----
045_M.wav
K
197.74919809 0.0167323493911
-----
054_K.wav
K
-----
082_M.wav
M
-----
007_M.wav
M
-----
016_K.wav
K
-----
033_M.wav
M
-----
042_M.wav
M
-----
046_K.wav
K
-----
060_K.wav
K
-----
032_M.wav
M
-----
065_M.wav
M
-----
022_K.wav
K
-----
026_M.wav
M
-----
015_K.wav
M
71.2631278099 0.236686907743
-----
019_M.wav
M
-----
061_M.wav
M
-----
030_M.wav
M
-----
073_K.wav
K
-----
075_M.wav
M
-----
088_K.wav
K
-----
069_K.wav
K
-----
062_K.wav
K
-----
013_M.wav
M
-----
024_M.wa