In [1]:
import numpy as np
import wave
from scipy.io import wavfile
from scipy import signal
from keras.utils.np_utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Reshape, Input
from tensorflow.keras.layers import Convolution2D, BatchNormalization
from librosa.core import stft
import matplotlib.pyplot as plot
import matplotlib.cm as cm
import tensorflow.keras.backend as K
from tensorflow.keras import optimizers

Using TensorFlow backend.


In [2]:
def _wav2array(nchannels, sampwidth, data):
    """data must be the string containing the bytes from the wav file."""
    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
    if remainder > 0:
        raise ValueError('The length of data is not a multiple of '
                         'sampwidth * num_channels.')
    if sampwidth > 4:
        raise ValueError("sampwidth must not be greater than 4.")

    if sampwidth == 3:
        a = np.empty((num_samples, nchannels, 4), dtype=np.uint8)
        raw_bytes = np.fromstring(data, dtype=np.uint8)
        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
        result = a.view('<i4').reshape(a.shape[:-1])
    else:
        # 8 bit samples are stored as unsigned ints; others as signed ints.
        dt_char = 'u' if sampwidth == 1 else 'i'
        a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
        result = a.reshape(-1, nchannels)
    return result


def readwav(file):
    """
    Read a wav file.
    Returns the frame rate, sample width (in bytes) and a numpy array
    containing the data.
    This function does not read compressed wav files.
    """
    wav = wave.open(file)
    rate = wav.getframerate()
    nchannels = wav.getnchannels()
    sampwidth = wav.getsampwidth()
    nframes = wav.getnframes()
    data = wav.readframes(nframes)
    wav.close()
    array = _wav2array(nchannels, sampwidth, data)
    return rate, sampwidth, array

In [None]:
X_data = np.zeros(shape=(184214,45056))
print(X_data.shape)

for r in range(0, 184214):
    audio = readwav('/Users/josephcleveland/LongerDataset/samp' + str(r) + '.wav')[2]
    audio = audio.T[0]
    if(audio.shape != 45056):
        padded = np.zeros(shape=(45056))
        padded[:np.shape(audio)[0]] = audio
        audio = padded
    X_data[r] = audio
    


(184214, 45056)




In [None]:
spect = stft(X_data[15],n_fft=1408)
print(spect.shape)
plot.imshow(spect.real[0:64], aspect='auto', cmap=cm.gist_rainbow)
plot.show()

In [None]:
Y_data = np.zeros(shape=(184214, 23))
mat = np.genfromtxt('/Users/josephcleveland/FullData/mat.txt', delimiter=',')
Y_data = mat[0:184214]
Y_train = Y_data
#for i in range(0, 10000):
#    onehot = to_categorical(mat[i], 16)
#    Y_data[i] = onehot.ravel()

In [None]:
#X_data = signal.resample(X_data, 16384)
X_train = np.zeros((184214, 64, 64))
for i in range(0, 184214):
    X_train[i] = stft(X_data[i],n_fft=1408)[0:64, 0:64]
Y_train = Y_data
X_train = np.expand_dims(X_train, axis=3)

print(X_train.shape)

In [458]:
#model = Sequential()
#model.add(BatchNormalization())
#model.add(Convolution2D(96, kernel_size=(64, 1), strides=(4, 1), padding="valid", activation='relu', data_format="channels_last", input_shape=(16384, 1, 1)))
#model.add(Convolution2D(96, kernel_size=(32, 1), strides=(4, 1), padding="valid", activation='relu'))
#model.add(Convolution2D(128, kernel_size=(16, 1), strides=(4, 1), padding="valid", activation='relu'))
#model.add(Convolution2D(257, kernel_size=(8, 1), strides=(4, 1), padding="valid", activation='relu'))
#model.add(Reshape((173, 257, 1)))
#model.add(Reshape((61, 257, 1)))
#model.add(Convolution2D(32, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu'))
#model.add(Convolution2D(71, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu'))
#model.add(Convolution2D(128, kernel_size=(4, 3), strides=(3, 2), padding="same", activation='relu'))
#model.add(Convolution2D(128, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu'))
#model.add(Convolution2D(128, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu'))
#model.add(Convolution2D(128, kernel_size=(3, 3), strides=(2, 1), padding="same", activation='relu'))
#model.add(Flatten())
#model.add(Dense(512, activation='relu'))
#model.add(Dense(368, activation='relu'))

#model.compile(loss='categorical_crossentropy',
#              optimizer='adam',
#              metrics=['accuracy'])

In [None]:
inputs = Input(shape=(64,64,1))

# a layer instance is callable on a tensor, and returns a tensor
x = BatchNormalization()(inputs)
x = Convolution2D(32, kernel_size=(4, 5), strides=(3, 4), padding="same", activation='relu')(x)
x = Convolution2D(98, kernel_size=(4, 6), strides=(3, 5), padding="same", activation='relu')(x)
x = Convolution2D(128, kernel_size=(4, 6), strides=(3, 5), padding="same", activation='relu')(x)
#x = Convolution2D(128, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu')(x)
#x = Convolution2D(128, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu')(x)
#x = Convolution2D(128, kernel_size=(3, 3), strides=(2, 1), padding="same", activation='relu')(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dense(23)(x)
# This creates a model that includes
# the Input layer and three Dense layers
adam = optimizers.Adam(lr=0.0001)
model = Model(inputs=inputs, outputs=x)
model.compile(adam,
              loss='mean_squared_error',
              metrics=['accuracy'])

In [None]:

model.fit(X_train, Y_train, 
          batch_size=16, epochs=100, verbose=1, validation_split=0.1)

In [17]:
model.build(input_shape=(45056, 1, 1))
print(model.summary())
print()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 64, 64, 1)]       0         
_________________________________________________________________
batch_normalization_v2_1 (Ba (None, 64, 64, 1)         4         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 22, 16, 32)        672       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 4, 98)          75362     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 1, 128)         301184    
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 2, 1, 128)         147584    
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)               0   

In [25]:
o = model.predict(X_train)

In [10]:
print(o[1])
print(mat[1])
print(np.shape(o))

[ 4.077089   4.661457   7.319432   7.1032705 10.003686   4.011772
 11.534853   7.038624  14.273697  12.439068  11.698097  13.491874
  9.351921   2.8655946 11.80694    4.845967   9.220253   4.9304028
  7.0712137  5.054822   2.8614168  2.1464577  2.2779913]
[ 2.  9.  4. 12. 15.  4.  7.  3. 15. 12. 12. 14.  9.  0. 12.  8.  8.  5.
  7.  8.  7.  1.  0.]
(82280, 23)


In [9]:
group = 0
classes = np.zeros((o.shape[0], 23))
samp = 1
for s in range(0, o.shape[0]):
    for i in range(0, 23):
        maxVal = o[s, i * 16]
        maxI = 0
        for j in range(0, 16):
            v = o[s, (i*16) + j]
            if v > maxVal:
                maxVal = o[s, (i*16) + j]
                maxI = j
        classes[s, i] = maxI
print(classes)

IndexError: index 23 is out of bounds for axis 1 with size 23

In [29]:
print(o[10])
print(mat[10])

[11.702169   5.7339787  6.2017517  9.970756   9.639844   3.9807825
  6.853554   5.038217  12.988592   9.821326  10.291265   5.4956775
 12.666799   5.187009   3.8794165  8.454811   6.2328587 15.02384
  8.572933   5.468956  12.684594   7.267308   7.9432487]
[13.  8.  7. 11. 13.  4.  6.  6.  7. 10. 10.  8. 14.  1.  3.  4.  7. 15.
  8.  7. 11.  9. 13.]


In [26]:
average_dist = np.zeros(23)

for p in range(0, 23):
    dist_total = 0
    for i in range(0, 82280):
        dist_total += np.absolute(o[i, p] - mat[i, p])
    average_dist[p] = dist_total / 82280
    
print(average_dist)


[2.78356618 2.81261644 2.69903284 2.71368652 2.5583678  2.59226454
 2.31497561 2.48821694 2.33285342 2.48370028 2.11280012 2.1474281
 2.49421588 2.58072394 2.15158778 2.38844072 0.79193505 1.05900752
 2.9915637  2.96803418 1.70201075 1.40181092 2.97580169]


In [279]:
# Save the weights
model.save_weights('Regression1_weights.h5')

# Save the model architecture
with open('Regression1_architecture.json', 'w') as f:
    f.write(model.to_json())