In [1]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import AveragePooling2D, Dropout, Flatten, Dense, Input
from tensorflow.keras.applications import VGG16, MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.utils import to_categorical
import numpy as np
import os

Spectrogram loading...

In [3]:
mel_folder = r"/home/user/Sahil/Mel_spec"
labels = []
data = []
for filename in os.listdir(mel_folder):
    labels.append(filename[len(filename)-5])
    file = np.load(mel_folder + '/' + filename)
    data.append(file)


In [4]:
data = np.array(data)
labels = np.array(labels)

In [5]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels , test_labels = train_test_split(data, labels, test_size=0.2,random_state = 1)
print (train_data.shape, train_labels.shape)
print (test_data.shape, test_labels.shape)

(7624, 128, 65) (7624,)
(1906, 128, 65) (1906,)


In [6]:
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)


In [7]:
train_data = np.reshape(train_data,(train_data.shape[0],train_data.shape[1],train_data.shape[2],1))
test_data = np.reshape(test_data,(test_data.shape[0],test_data.shape[1],test_data.shape[2],1))
print(train_data.shape)
print(test_data.shape)

(7624, 128, 65, 1)
(1906, 128, 65, 1)


68 features loading...

In [8]:
import os
import numpy as np
folder_path = r"/home/user/Sahil/features_68_numpy"
data = []
labels = []
for filename in os.listdir(folder_path):
    if filename.endswith('.npy'):
        file_path = os.path.join(folder_path,filename)
        file = np.load(file_path)
        #print(filename)
        #print(file.shape)
        data.append(file)
        labels.append(filename[len(filename)-5])

In [9]:
data = np.array(data)
labels = np.array(labels)

In [10]:
from sklearn.model_selection import train_test_split
Ftrain_data, Ftest_data, Ftrain_labels , Ftest_labels = train_test_split(data, labels, test_size=0.2,random_state = 1)
print (Ftrain_data.shape, Ftrain_labels.shape)
print (Ftest_data.shape, Ftest_labels.shape)

(7624, 68, 128) (7624,)
(1906, 68, 128) (1906,)


In [11]:
Ftrain_data = np.array(Ftrain_data)

In [12]:
Ftrain_data = np.reshape(Ftrain_data,(Ftrain_data.shape[0],Ftrain_data.shape[1]*Ftrain_data.shape[2]))
Ftest_data = np.reshape(Ftest_data,(Ftest_data.shape[0],Ftest_data.shape[1]*Ftest_data.shape[2]))


In [13]:
Ftrain_labels = to_categorical(Ftrain_labels)
Ftest_labels = to_categorical(Ftest_labels)


In [14]:
 Ftrain_labels == train_labels

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       ...,
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

Model loading...

In [15]:
orth_initializer = tf.keras.initializers.Orthogonal()

In [16]:
# VGG Model

baseModel = VGG16(weights=None, include_top=False, input_tensor=Input(shape=(128,65,1)))

# add layers for transfer learning
headModel = baseModel.output
#headModel = AveragePooling2D(pool_size=(4, 4),name = 'averagepool_last')(headModel)
headModel = Flatten(name="flatten")(headModel)


tensor_train = Input(shape=(Ftrain_data.shape[1],),name = "68_feature_input")
merged_output = tf.keras.layers.Concatenate(axis=1,name = "merge_inputs")([headModel, tensor_train])

#headModel = Dense(1024, activation="relu")(headModel)
headModel = Dense(512, activation="relu",name = 'dense_512')(merged_output)
headModel = Dense(128, activation="relu",name = 'dense_128')(headModel)   # we can experiment with more dense layers if output of average pooling layer is too big
headModel = Dropout(0.3,name = 'dropout_last')(headModel)
out_mlp = Dense(3, activation="softmax",name = 'output')(headModel)  # we have 5 classes

# place the head FC model on top of the base model (this will become
# the actual model we will train)
total_model = Model(inputs = [baseModel.input,tensor_train], outputs=out_mlp)
#total_model.summary()
    


In [17]:
for layer in total_model.layers:
    if layer.__class__.__name__=='BatchNormalization':
        #print('exist')
        layer.trainable = False

        
regularizer = tf.keras.regularizers.l2(.0001)

for layer in total_model.layers:
    if layer.trainable == True:
        for attr in ['kernel_regularizer']:
            if hasattr(layer, attr):
                setattr(layer, attr, regularizer)
                
for layer in total_model.layers:
    if layer.trainable == True:
        for attr in ['kernel_initializer']:
            if hasattr(layer, attr):
                setattr(layer, attr, orth_initializer)

In [18]:
opt=Adam(lr=1e-4)
total_model.compile(loss='categorical_crossentropy', optimizer=opt,	metrics=["accuracy"])
total_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 65, 1)] 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 128, 65, 64)  640         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 128, 65, 64)  36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 64, 32, 64)   0           block1_conv2[0][0]               
______________________________________________________________________________________________

In [19]:
import tensorflow as tf
#callback for best Val Accuracy
new_model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="/home/user/Sahil/dual_features.ckpt",
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [20]:
#total_model
total_model.fit([train_data,Ftrain_data], train_labels, batch_size=32,validation_data=([test_data,Ftest_data], test_labels),epochs=100, verbose=2, callbacks=[new_model_checkpoint_callback])

Epoch 1/100
239/239 - 137s - loss: 0.8083 - accuracy: 0.7155 - val_loss: 0.6047 - val_accuracy: 0.7539
Epoch 2/100
239/239 - 18s - loss: 0.5732 - accuracy: 0.7647 - val_loss: 0.5537 - val_accuracy: 0.7650
Epoch 3/100
239/239 - 18s - loss: 0.5376 - accuracy: 0.7710 - val_loss: 0.5452 - val_accuracy: 0.7665
Epoch 4/100
239/239 - 19s - loss: 0.5268 - accuracy: 0.7854 - val_loss: 0.5580 - val_accuracy: 0.7739
Epoch 5/100
239/239 - 19s - loss: 0.5041 - accuracy: 0.7916 - val_loss: 0.5357 - val_accuracy: 0.7791
Epoch 6/100
239/239 - 18s - loss: 0.4856 - accuracy: 0.7992 - val_loss: 0.6041 - val_accuracy: 0.7671
Epoch 7/100
239/239 - 18s - loss: 0.4709 - accuracy: 0.8051 - val_loss: 0.5435 - val_accuracy: 0.7686
Epoch 8/100
239/239 - 18s - loss: 0.4624 - accuracy: 0.8089 - val_loss: 0.5909 - val_accuracy: 0.7655
Epoch 9/100
239/239 - 18s - loss: 0.4518 - accuracy: 0.8183 - val_loss: 0.5633 - val_accuracy: 0.7744
Epoch 10/100
239/239 - 18s - loss: 0.4509 - accuracy: 0.8160 - val_loss: 0.5776 -

Epoch 81/100
239/239 - 18s - loss: 0.0711 - accuracy: 0.9723 - val_loss: 2.1842 - val_accuracy: 0.7613
Epoch 82/100
239/239 - 18s - loss: 0.0813 - accuracy: 0.9701 - val_loss: 1.9864 - val_accuracy: 0.7592
Epoch 83/100
239/239 - 18s - loss: 0.0695 - accuracy: 0.9731 - val_loss: 2.0463 - val_accuracy: 0.7492
Epoch 84/100
239/239 - 18s - loss: 0.0814 - accuracy: 0.9704 - val_loss: 1.9422 - val_accuracy: 0.6847
Epoch 85/100
239/239 - 18s - loss: 0.0866 - accuracy: 0.9692 - val_loss: 1.9224 - val_accuracy: 0.7560
Epoch 86/100
239/239 - 18s - loss: 0.0704 - accuracy: 0.9738 - val_loss: 2.2769 - val_accuracy: 0.7587
Epoch 87/100
239/239 - 18s - loss: 0.0684 - accuracy: 0.9747 - val_loss: 1.8113 - val_accuracy: 0.7555
Epoch 88/100
239/239 - 18s - loss: 0.0627 - accuracy: 0.9751 - val_loss: 2.1075 - val_accuracy: 0.7671
Epoch 89/100
239/239 - 18s - loss: 0.0612 - accuracy: 0.9753 - val_loss: 2.0352 - val_accuracy: 0.7298
Epoch 90/100
239/239 - 18s - loss: 0.0687 - accuracy: 0.9744 - val_loss: 

<tensorflow.python.keras.callbacks.History at 0x7fa3b0072040>

In [24]:
total_model.load_weights("/home/user/Sahil/dual_features.ckpt")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fa3b0234d60>

In [27]:
total_model.save_weights("/home/user/Sahil/dual_features.h5")

In [25]:
loss, acc = total_model.evaluate([test_data,Ftest_data], test_labels, verbose=2)

60/60 - 1s - loss: 0.5357 - accuracy: 0.7791
