<a href="https://colab.research.google.com/github/Ananas1Anass/Speech-Recognition/blob/main/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!unzip -u "/content/drive/MyDrive/normalized1.zip" -d "/content/drive/MyDrive/dataset_port"

In [None]:
!ls /content/drive/MyDrive/dataset_port/normalized1


Ajuda	    Arma	 _background_noise_  Policia  Segurenca
Ambulancia  Assassinato  Faca		     Roubo


In [None]:
!unzip -u "/content/drive/MyDrive/data_Anass.zip" -d "/content/drive/MyDrive/dataset_eng"

In [None]:
!ls /content/drive/MyDrive/dataset_eng/data_Anass/


_background_noise_  bird  down	   go	 off	stop	wow
backward	    cat   follow   left  right	up	yes
bed		    dog   forward  no	 seven	visual	zero


In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
from os import listdir
from os.path import isdir, join
from tensorflow.keras import layers, models, metrics
import tensorflow as tf
import numpy as np

In [None]:
# Create list of all targets (minus background noise)
dataset_path = '/content/drive/MyDrive/dataset_eng/data_Anass/'
all_targets = all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
all_targets.remove('_background_noise_')

In [None]:
!ls /content/drive/MyDrive/dataset_eng/

data_Anass  targets_mfcc_32_eng.npz  targets_mfcc_32_port.npz


In [None]:
# Settings
feature_sets_path = '/content/'
feature_sets_filename = 'targets_mfcc_32_eng.npz'
wake_words = all_targets
joined_wake_words = '_'.join(wake_words)
model_filename = f'/content/drive/MyDrive/dataset_eng/wake_words_{joined_wake_words}_work.h5'
# Load feature sets
feature_sets = np.load(join(feature_sets_path, feature_sets_filename))
print(feature_sets.files)

['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test']


In [None]:
# Assign feature sets
x_train = feature_sets['x_train']
y_train = feature_sets['y_train']
x_val = feature_sets['x_val']
y_val = feature_sets['y_val']
x_test = feature_sets['x_test']
y_test = feature_sets['y_test']

In [None]:
# Look at tensor dimensions
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
# Peek at labels
print(y_val)

(26417, 32, 32)
(3292, 32, 32)
(3333, 32, 32)
[10. 10. 19. ...  1.  6. 15.]


In [None]:
# Convert ground truth arrays to one wake word (1) and 'other' (0)
wake_words_indexes = [all_targets.index(wake_word) for wake_word in wake_words]
ys_train = np.array(list(map(np.array, zip(*[np.equal(y_train, wake_word_index).astype('float64') for wake_word_index in wake_words_indexes]))))
ys_val = np.array(list(map(np.array, zip(*[np.equal(y_val, wake_word_index).astype('float64') for wake_word_index in wake_words_indexes]))))
ys_test = np.array(list(map(np.array, zip(*[np.equal(y_test, wake_word_index).astype('float64') for wake_word_index in wake_words_indexes]))))

In [None]:
print(tuple(sum(y_val) / len(y_val) for y_val in zip(*ys_val)))

(0.11755771567436209, 0.027946537059538274, 0.050729040097205344, 0.02703523693803159, 0.05133657351154314, 0.028250303766707167, 0.04495747266099635, 0.05224787363304982, 0.050729040097205344, 0.04374240583232078, 0.10814094775212636, 0.04981773997569866, 0.027946537059538274, 0.022174969623329283, 0.051944106925880924, 0.02399756986634265, 0.024301336573511544, 0.02551640340218712, 0.04981773997569866, 0.12181044957472661)


In [None]:
# CNN for TF expects (batch, height, width, channels)
# So we reshape the input tensors with a "color" channel of 1
x_train = x_train.reshape(x_train.shape[0],
                          x_train.shape[1],
                          x_train.shape[2],
                          1)
x_val = x_val.reshape(x_val.shape[0],
                      x_val.shape[1],
                      x_val.shape[2],
                      1)
x_test = x_test.reshape(x_test.shape[0],
                        x_test.shape[1],
                        x_test.shape[2],
                        1)
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

# Input shape for CNN is size of MFCC of 1 sample
sample_shape = x_test.shape[1:]
print(sample_shape)

(26417, 32, 32, 1)
(3292, 32, 32, 1)
(3333, 32, 32, 1)
(32, 32, 1)


In [None]:
# Build model
# Based on: https://www.geeksforgeeks.org/python-image-classification-using-keras/
model = models.Sequential()
model.add(layers.Conv2D(32, 
                        (2, 2), 
                        activation='relu',
                        input_shape=sample_shape,padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, 
                        (2, 2), 
                        activation='relu',
                        input_shape=sample_shape,padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(128, (2, 2),
                        activation='relu',padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(256, (2, 2),
                        activation='relu', padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(1024, (2, 2),
                       activation='relu', padding='same'))
#model.add(layers.MaxPooling2D(pool_size=(2, 2)))
#model.add(layers.Conv2D(1024, (2, 2),
                        #activation='relu', padding='same'))

# Classifier
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.4))

model.add(layers.Dense(len(wake_words), activation='sigmoid'))



In [None]:
# Display model
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_16 (Conv2D)          (None, 32, 32, 32)        160       
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 16, 16, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_17 (Conv2D)          (None, 16, 16, 64)        8256      
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 8, 8, 64)         0         
 g2D)                                                            
                                                                 
 conv2d_18 (Conv2D)          (None, 8, 8, 128)         32896     
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 4, 4, 128)       

In [None]:
# Add training parameters to model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              loss_weights=[1, 100],
              metrics=['acc', metrics.Precision(), metrics.Recall()])

In [None]:
# Train
history = model.fit(x_train,
                    ys_train,
                    epochs=50,
                    batch_size=200,
                   
                    validation_data=(x_val, ys_val),
                   )

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Plot results
import matplotlib.pyplot as plt

 

#acc = history.history['acc']
#val_acc = history.history['val_acc']
#loss = history.history['loss']
#val_loss = history.history['val_loss']
recall = history.history['recall']
val_recall = history.history['val_recall']
precision = history.history['precision']
val_precision = history.history['val_precision']

 


epochs = range(1, len(acc) + 1)

 

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

 

plt.figure()

 

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.savefig('Training_and_validation_loss.png')

 

plt.show()

 


plt.figure()

 

plt.plot(epochs, precision, 'bo', label='Training precision')
plt.plot(epochs, val_precision, 'b', label='Validation precision')
plt.title('Training and validation precision')
plt.legend()
plt.savefig('Training_and_validation_precision.png')

 

plt.show()

 

plt.figure()

 

plt.plot(epochs, recall, 'bo', label='Training recall')
plt.plot(epochs, val_recall, 'b', label='Validation recall')
plt.title('Training and validation recall')
plt.legend()
plt.savefig('Training_and_validation_recall.png')

 

plt.show()

NameError: ignored

In [None]:
# Save the model as a file
models.save_model(model, model_filename)

# See which are stop'
#for idx, y in enumerate(y_test):
#    if y == 1:
#        print(idx)

# TEST: Load model and run it against test set
#model = models.load_model(model_filename)
#for i in range(100, 200):
#    print('Answer:', y_test[i], ' Prediction:', model.predict(np.expand_dims(x_test[i], 0)))

# Evaluate model with test set
model.evaluate(x=x_test, y=ys_test)

NameError: ignored

In [None]:
all_targets

NameError: ignored