In [57]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import scipy as sp
import pandas as pd


import librosa #as _librosa
#from presets import Preset
# librosa = Preset(_librosa)
# librosa['sr'] = 44100
# librosa['n_fft'] = 4096
# librosa['hop_length'] = 1024

import librosa.display
import IPython.display as ipd
import os

%matplotlib notebook

# Function Definitions

In [62]:
def cutter(x, sr=None):
    '''
    cutter(x) takes wave amplitude array (x) and trims the edges, to avoid vocal onsets/stops at the beginning/end.  
    '''
    if sr == None:
        n1 = int(0.25*len(x))
        n2 = int(0.75*len(x))
    
        return x[n1:n2] 
    
    else:
        mid_idx = int(len(x)//2) # middle index
        hlf_wdw = int(0.5*(sr)/2.0) # half of 30 sec window in # of elements 
        return x[mid_idx-hlf_wdw:mid_idx+hlf_wdw] 

def formant_signal(x,order):
    '''
    formant_signal(x,order) takes wave amplitude array (x) and returns the LPC approximation 
    (s_formant) based on a linear autoregressive model to a given specified order (order).
    
    s_formant can be understood as the (sourceless) amplitude due to the vocal formants of the signal x.
    '''
    
    a = librosa.lpc(x, order) # (x,16)
    b = np.hstack([[0], -1 * a[1:]])
    s_formant = sp.signal.lfilter(b, [1], x)
    
    return s_formant

def residual(x,order,s=None):
    '''
    residual(x,order,s) subtracts the formants from the input signal x to yield the source wave (vocal cord buzz).
    '''
    if s is None: 
        s_formant = formant_signal(x,order)
        return (x-s_formant)
    else:
        return (x-s)

def audio_processor(x,sr=44100,order=16):
    '''
    Take input file and return ready for CNN 
    '''
    #x, sr = librosa.load(path_raw+file_test,sr=44100) #
    R_ = librosa.stft(residual(cutter(x,sr),order),n_fft=2048) # complex matrix 
    Rabs_ = np.abs(R_)
    RdB_ = librosa.amplitude_to_db(Rabs_, ref=np.max)
    RdB_expanded = np.expand_dims(RdB_, axis=2)
    RdB_ready = np.array( [RdB_expanded,] )
    #predict_2 = model.predict(   )
    
    return RdB_ready

In [None]:
 
# get images into array of shape (num_images, im_f, im_t)
def get_image_array(path_mat,im_f,im_t):
    
    filelist = [x for x in os.listdir(path_mat) if not x.startswith('.')]

    Rabs_array = np.zeros((len(filelist),im_f,im_t))

    for index,file in enumerate(filelist[:]):

        Rabs_array[index,:,:] = np.loadtxt(path_mat+'R'+str(index)+'.txt')

    print(Rabs_array.shape)
    
    return Rabs_array

# Reduce label array to 3 classes
def get_y3(y):
    
    y3 = np.copy(y)
    
    for index,elem in enumerate(y):
        
        if int(elem) == 2:
            
            y3[index] = 1
            
        if int(elem) == 3:
            
            y3[index] = 2
            
    return y3


In [78]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(4, 3))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

# Path Management

In [56]:
# Specify Data Paths
path_raw =  os.path.abspath(os.pardir+"/Data/Data_Raw_Labeled/") + "/"
path_mat =  os.path.abspath(os.pardir+"/Data/Data_Mat/") + "/"
path_lab =  os.path.abspath(os.pardir+"/Data/Data_Lab/") + "/"
path_input =  os.path.abspath(os.pardir+"/Data/Data_Input/") + "/"

path_raw

'/Users/tamiro/Desktop/Insight/Code/SafeSing/SafeSing_Project/Data/Data_Raw_Labeled/'

In [41]:
# Specify the Order of the Formant Approximation
order = 16

# Process Audio to Images

In [48]:
# Process waves from Data_Raw_Labeled into abs(amplitude) images, dump into path_mat
# labels B: Breathy , N: Neutral, F: Flow, P: Pressed

save = True # set to True to save output

# counters
cb = 0 # count breathy
cn = 0 # count neutral
cf = 0 # count flow
cp = 0 # count pressed

# get all files in the path_raw folder
filelist = [x for x in os.listdir(path_raw) if not x.startswith('.')]

# initialize labels (y) array - one label for every input file.
y = np.zeros(len(filelist),dtype=int)

for index, file in enumerate(filelist[:]):
    
    print('index=',index)
    
    if 'breathy' in file:
        label = 0 
        cb += 1
    if 'neutral' in file:
        label = 1 
        cn += 1
    if 'flow' in file:
        label = 2 
        cf += 1
    if 'pressed' in file:
        label = 3 
        cp += 1
    try:
        x, sr = librosa.load(path_raw+file,sr=44100) #
        if len(x) < 22050: # if less than half a second
            print('WARNING, INPUT LESS THAN 30 SECS!')
        #print(x.shape)
    except:
        print(file+'has an issue')
    
    R = librosa.stft(residual(cutter(x,sr),order),n_fft=2048) # complex matrix 
    Rabs = np.abs(R) # square root of the spectrogram matrix
    #print(R.shape)
    
    y[index] = label
    
    if save==True:
        np.savetxt(path_mat+'R'+str(index)+'.txt',Rabs)

if save==True:
    np.savetxt(path_lab+'y'+'.txt',y)
    
print('counts:',cb,cn,cf,cp)
print('len_file_list:'+str(len(filelist))+' =? '+str(cb+cn+cf+cp))

index= 0
index= 1
index= 2
index= 3
index= 4
index= 5
index= 6
index= 7
index= 8
index= 9
index= 10
index= 11
index= 12
index= 13
index= 14
index= 15
index= 16
index= 17
index= 18
index= 19
index= 20
index= 21
index= 22
index= 23
index= 24
index= 25
index= 26
index= 27
index= 28
index= 29
index= 30
index= 31
index= 32
index= 33
index= 34
index= 35
index= 36
index= 37
index= 38
index= 39
index= 40
index= 41
index= 42
index= 43
index= 44
index= 45
index= 46
index= 47
index= 48
index= 49
index= 50
index= 51
index= 52
index= 53
index= 54
index= 55
index= 56
index= 57
index= 58
index= 59
index= 60
index= 61
index= 62
index= 63
index= 64
index= 65
index= 66
index= 67
index= 68
index= 69
index= 70
index= 71
index= 72
index= 73
index= 74
index= 75
index= 76
index= 77
index= 78
index= 79
index= 80
index= 81
index= 82
index= 83
index= 84
index= 85
index= 86
index= 87
index= 88
index= 89
index= 90
index= 91
index= 92
index= 93
index= 94
index= 95
index= 96
index= 97
index= 98
index= 99
index= 100

index= 755
index= 756
index= 757
index= 758
index= 759
index= 760
index= 761
index= 762
index= 763
index= 764
index= 765
index= 766
index= 767
index= 768
index= 769
index= 770
index= 771
index= 772
index= 773
index= 774
index= 775
index= 776
index= 777
index= 778
index= 779
index= 780
index= 781
index= 782
index= 783
index= 784
index= 785
index= 786
index= 787
index= 788
index= 789
index= 790
index= 791
index= 792
index= 793
index= 794
index= 795
index= 796
index= 797
index= 798
index= 799
index= 800
index= 801
index= 802
index= 803
index= 804
index= 805
index= 806
index= 807
index= 808
index= 809
index= 810
index= 811
index= 812
index= 813
index= 814
index= 815
index= 816
index= 817
index= 818
index= 819
index= 820
index= 821
index= 822
index= 823
index= 824
index= 825
index= 826
index= 827
index= 828
index= 829
index= 830
index= 831
index= 832
index= 833
index= 834
index= 835
index= 836
index= 837
index= 838
index= 839
index= 840
index= 841
index= 842
index= 843
index= 844
index= 845

# Model 1

In [69]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [71]:
# static parameters by design (for 0.5 sec recording, sr=44100, n_fft=2048)
im_f= 1025
im_t= 44

n_samples = 909

In [54]:
# Sample image
Rabs = np.loadtxt(path_mat+'R'+str(0)+'.txt')

print(Rabs.shape)
plt.figure(figsize=(2,2))
librosa.display.specshow(Rabs, x_axis='time', y_axis='log')

(1025, 44)


<IPython.core.display.Javascript object>

<matplotlib.collections.QuadMesh at 0x123a7d7c0>

## Model Input Prep

In [63]:
# Pre-process data for CNN

# Rabs_array = get_image_array(path_mat,im_f,im_t)

# y = np.loadtxt(path_lab+'y'+'.txt')
# y3 = get_y3(y)

# np.savetxt(path_input+'R_array'+'.txt',Rabs_array.flatten())
# np.savetxt(path_input+'Y3'+'.txt',y3)

(909, 1025, 44)


In [66]:
# Load Data for CNN
Rabs_array = np.loadtxt(path_input+'R_array'+'.txt').reshape((n_samples,im_f,im_t))
y3 = np.loadtxt(path_input+'Y3'+'.txt')

print(Rabs_array.shape)
print(y3.shape)

In [68]:
# Log the abs(amplitude) to dBs.
RdB = librosa.amplitude_to_db(Rabs_array, ref=np.max)
RdB.shape

(909, 1025, 44)

# Train the Model

In [75]:
# Train_Test_Split
train_images, test_images, train_labels, test_labels = train_test_split(RdB, y3, test_size=0.3, random_state=40)

# Reshape the images.
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

num_filters = 8
filter_size = 3
pool_size = 2

# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(im_f, im_t, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(3, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

In [76]:
# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=30,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x161a9a610>

In [77]:
model.save_weights('ssing_cnn_v1.h5')

# Predictions & Confusion Matrices

In [79]:
predictions = model.predict(test_images[:])
cm =confusion_matrix(test_labels, np.argmax(predictions, axis=1))

In [80]:
plot_confusion_matrix(cm,['breathy','balanced','pressed'],
                          title='Confusion matrix',
                          cmap=None,
                          normalize=False)

<IPython.core.display.Javascript object>

In [111]:
# Testing particular wave files
filelist = [x for x in os.listdir(path_raw) if not x.startswith('.')]
err_indices = []
for index in range(0,n_samples):
    file_test = filelist[index]
    
    RdB_1 = np.expand_dims(RdB[index], axis=2)
    predict_1 = model.predict( np.array( [RdB_1,] )  )
    
    x, sr = librosa.load(path_raw+file_test,sr=44100) #
    R = librosa.stft(residual(cutter(x,sr),order),n_fft=2048) # complex matrix 
    Rabs = np.abs(R)
    RdB_2 = np.expand_dims(librosa.amplitude_to_db(Rabs, ref=np.max), axis=2)
    predict_2 = model.predict( np.array( [RdB_2,] )  )
    
    if not (np.argmax(predict_1, axis=1)[0]==np.argmax(predict_2, axis=1)[0]):
        add_str = ', Error at index =' +str(index)
        err_indices.append(index)
    else:
        add_str = ''
        
    print(str(np.argmax(predict_1, axis=1)[0])+' =? '+str(np.argmax(predict_2, axis=1)[0])+add_str)
    
print(err_indices)

1 =? 0, Error at index =0
2 =? 1, Error at index =1
0 =? 0
1 =? 1
1 =? 1
2 =? 2
1 =? 1
1 =? 1
0 =? 0
0 =? 0
2 =? 2
2 =? 2
1 =? 1
1 =? 1
1 =? 1
2 =? 2
2 =? 2
2 =? 2
0 =? 0
0 =? 0
1 =? 0, Error at index =20
0 =? 0
0 =? 0
0 =? 0
0 =? 0
1 =? 1
2 =? 1, Error at index =26
1 =? 1
2 =? 2
1 =? 1
1 =? 1
0 =? 0
1 =? 1
0 =? 0
0 =? 0
1 =? 1
0 =? 0
1 =? 1
0 =? 0
0 =? 0
0 =? 0
1 =? 1
2 =? 1, Error at index =42
0 =? 0
2 =? 2
1 =? 1
0 =? 0
1 =? 1
1 =? 1
1 =? 1
1 =? 1
2 =? 1, Error at index =51
1 =? 1
2 =? 2
0 =? 0
0 =? 0
0 =? 0
2 =? 2
2 =? 2
2 =? 2
1 =? 1
1 =? 1
2 =? 2
1 =? 1
1 =? 1
1 =? 1
0 =? 0
1 =? 1
1 =? 1
1 =? 1
2 =? 1, Error at index =70
2 =? 1, Error at index =71
2 =? 2
0 =? 0
0 =? 0
2 =? 2
0 =? 0
0 =? 0
1 =? 1
0 =? 0
1 =? 1
0 =? 0
1 =? 1
2 =? 2
1 =? 1
2 =? 1, Error at index =85
1 =? 1
2 =? 2
2 =? 2
1 =? 1
2 =? 2
2 =? 1, Error at index =91
1 =? 1
0 =? 0
1 =? 1
0 =? 0
0 =? 0
2 =? 2
0 =? 0
2 =? 2
2 =? 2
2 =? 0, Error at index =101
2 =? 2
1 =? 1
1 =? 1
1 =? 1
2 =? 0, Error at index =106
2 =? 2
2 =?

In [108]:
err_indices_1 = err_indices

In [112]:
err_indices_2 = err_indices

In [113]:
# 86 errors - 10% ! WHY?
print('num_errs_1=',len(err_indices_1))
print('num_errs_2=',len(err_indices_2))

num_errs_1= 86
num_errs_2= 86


In [114]:
err_indices_2==err_indices_1
# Errors are consistent.

True