In [22]:
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_io as tfio
from tqdm import tqdm

from data_loader import data_loader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

## Yamnet imports 
import params as yamnet_params
import yamnet_modified  as yamnet_model
import features as features_lib

In [23]:
params = yamnet_params.Params()

classes = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
# classes = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad']

class_names = yamnet_model.class_names(
    './yamnet_class_map.csv'
)

# Data loading

In [24]:
dl = data_loader(
    Crema_path='../../Datasets/Crema/',
    Ravdess_path='../../Datasets/Ravdess/',
    Savee_path='../../Datasets/Savee/',
    Tess_path='../../Datasets/Tess/'
)

In [25]:
df = dl.get_savee_df()

In [26]:
train, val, test = dl.split_df(df)

In [27]:
train.shape, val.shape, test.shape

((336, 2), (72, 2), (72, 2))

In [28]:
train['Emotion'] = train['Emotion'].apply(lambda x: classes.index(x))
val['Emotion'] = val['Emotion'].apply(lambda x: classes.index(x))
test['Emotion'] = test['Emotion'].apply(lambda x: classes.index(x))

In [29]:
# encoder, train_label, val_label, test_label = dl.ohe_labels(
#     train['Emotion'].values, val['Emotion'].values, test['Emotion'].values)

In [30]:
train_label = train['Emotion'].values
val_label = val['Emotion'].values
test_label = test['Emotion'].values

train_label = tf.keras.utils.to_categorical(train_label, num_classes=len(classes))
val_label = tf.keras.utils.to_categorical(val_label, num_classes=len(classes))
test_label = tf.keras.utils.to_categorical(test_label, num_classes=len(classes))

In [31]:
train_ds = tf.data.Dataset.from_tensor_slices((train['File_Path'], train_label))
val_ds = tf.data.Dataset.from_tensor_slices((val['File_Path'], val_label))
test_ds = tf.data.Dataset.from_tensor_slices((test['File_Path'], test_label))

In [32]:
print(train_ds.element_spec)
print(val_ds.element_spec)
print(test_ds.element_spec)

(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))


In [33]:
#read the wav files
# @tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    
    return wav
 
def load_wav_for_map(filename, label):
  return load_wav_16k_mono(filename), label


def yamnet_frames_model_transfer1(wav_data,label):
   
    waveform_padded = features_lib.pad_waveform(wav_data, params)
    log_mel_spectrogram, features = features_lib.waveform_to_log_mel_spectrogram_patches(
        waveform_padded, params)
    print(log_mel_spectrogram.shape)
    
    
    return log_mel_spectrogram, label

In [34]:
train_ds = train_ds.map(load_wav_for_map)
val_ds = val_ds.map(load_wav_for_map)
test_ds = test_ds.map(load_wav_for_map)



In [35]:
print(train_ds.element_spec)
print(val_ds.element_spec)
print(test_ds.element_spec)

(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))


In [36]:
train_ds = train_ds.map(yamnet_frames_model_transfer1)
val_ds = val_ds.map(yamnet_frames_model_transfer1)
test_ds = test_ds.map(yamnet_frames_model_transfer1)

(None, 64)
(None, 64)
(None, 64)


In [37]:
print(train_ds.element_spec)
print(val_ds.element_spec)
print(test_ds.element_spec)

(TensorSpec(shape=(None, 64), dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=(None, 64), dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))
(TensorSpec(shape=(None, 64), dtype=tf.float32, name=None), TensorSpec(shape=(7,), dtype=tf.float32, name=None))


In [38]:
train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
val_ds = val_ds.cache().batch(32).prefetch(tf.data.experimental.AUTOTUNE)

# Building Model

In [39]:
#load yamnet model. yamnet_frames_model_transfer1 is modified version of the
#yamnet_frames_model_transfer in yamnet.py file in order to be able the 
#train yamnet from scratch


yamnet = yamnet_model.yamnet_frames_model_transfer(params, len(classes))

preloaded_layers = yamnet.layers.copy()
preloaded_weights = []

for pre in preloaded_layers:
        preloaded_weights.append(pre.get_weights())    



In [40]:
#load the weights from pretrain model except for the last layer and
#check which layer used the pretrain weights
# store weights before loading pre-trained weights

chkp=True

if chkp==True:
# load pre-trained weights(fine tuning the model)
#load the weights from pretrain model except for the last layer
    yamnet.load_weights('./yamnet.h5',by_name=True)
 #   yamnet.load_weights('D:/bat_n/yamnet_2.h5',by_name=True)
    for layer, pre in zip(yamnet.layers, preloaded_weights):
        weights = layer.get_weights()
        if weights:
            if np.array_equal(weights, pre):
                print('not loaded', layer.name)
            # else:
            #     print('loaded', layer.name)


  a1, a2 = asarray(a1), asarray(a2)


In [41]:
# NAME='./yamnet.h5'
# checkpoint = tf.keras.callbacks.ModelCheckpoint(NAME, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=10,
                                            restore_best_weights=True,
                                            verbose=1)

# tensorboard=tf.keras.callbacks.TensorBoard(
#     log_dir='D:/bat_n/logs')

yamnet.compile(optimizer='adam', 
               loss="categorical_crossentropy", 
               metrics=[
                        'accuracy',
                        tf.keras.metrics.Recall(),
                        tf.keras.metrics.Precision()
                ]
)

# yamnet.summary()

tf.keras.utils.plot_model(
    yamnet,
    to_file='yamnet.png',
    show_shapes=True,
    show_dtype=True,
    show_layer_names=True,
    rankdir='TB',
    expand_nested=True,
    dpi=96,
    layer_range=True,
    show_layer_activations=True
)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [42]:
yamnet.fit(
    train_ds,
    epochs=100, 
    validation_data= val_ds,
    callbacks=[
        # checkpoint,
        # tensorboard,
        callback
    ]
)

Epoch 1/100


InvalidArgumentError: Graph execution error:

Detected at node 'IteratorGetNext' defined at (most recent call last):
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 976, in launch_instance
      app.start()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\samin\AppData\Local\Temp\ipykernel_12024\85722099.py", line 1, in <cell line: 1>
      yamnet.fit(
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "c:\Users\samin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1145, in step_function
      data = next(iterator)
Node: 'IteratorGetNext'
Cannot batch tensors with different shapes in component 0. First element had shape [528,64] and element 8 had shape [192,64].
	 [[{{node IteratorGetNext}}]] [Op:__inference_train_function_15449]

In [None]:
#test the model
test_res = yamnet.evaluate(test_ds)

test_res

In [None]:

#dir_="D:/bat_n/df_test_b.csv"
#dir_="D:/bat_n/df_test_n.csv"
#dir_="D:/bat_n/df_test_uk.csv"
import os
dir_="D:/bat_n/norfolk_test_files.csv"
df_test_b=pd.read_csv(dir_)
base_data_path='D:/bat_n/wav/'
full_path = df_test_b['filename'].apply(lambda row: os.path.join(base_data_path, row))
df_test_b= df_test_b.assign(filename=full_path)

full_path = df_test_b['filename'].apply(lambda row: ( row+ '.wav'))

df_test_b= df_test_b.assign(filename=full_path)

filenames=df_test_b['filename']
targets=df_test_b['target']
df_test_b['fold']=1
folds=df_test_b['fold']


#the directory contained the .wav files

test_b = tf.data.Dataset.from_tensor_slices((filenames, targets,folds))
test_b= test_b.map(load_wav_for_map)
test_b = test_b.map(yamnet_frames_model_transfer1).unbatch()
remove_fold_column = lambda embedding, label, fold: (embedding, label)
test_b = test_b.map(remove_fold_column)
test_b = test_b.cache().batch(32).prefetch( tf.data.experimental.AUTOTUNE)

evaluate= yamnet.evaluate(test_b)







dir_="D:/bat_n/df_test_uk.csv"
dir_="D:/bat_n/uk_test_files1.csv"
df_test_b=pd.read_csv(dir_)


filenames=df_test_b['filename']
targets=df_test_b['target']
df_test_b['fold']=1
folds=df_test_b['fold']


#the directory contained the .wav files

test_b = tf.data.Dataset.from_tensor_slices((filenames, targets,folds))
test_b= test_b.map(load_wav_for_map)
test_b = test_b.map(yamnet_frames_model_transfer1).unbatch()
cached_ds = main_ds.cache()
test_b_train = cached_ds.filter(lambda embedding, label, fold: fold <2)
test_b_val = cached_ds.filter(lambda embedding, label, fold: fold ==3)
test_b_test = cached_ds.filter(lambda embedding, label, fold: fold == 4)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda embedding, label, fold: (embedding, label)

test_b_train= test_b_train.map(remove_fold_column)

test_b_val= test_b_val.map(remove_fold_column)

test_b_test= test_b_test.map(remove_fold_column)


#X_train = list(map(lambda x: x[0], train_ds))
#y_train = list(map(lambda x: x[1], train_ds))

#creat a batch of size 32 of frames with size (96,64)
#we have to suffle the train set to avoid the frames from the same audio on one batch
train_ds = test_b_train.cache().shuffle(1000).batch(32).prefetch( tf.data.experimental.AUTOTUNE)
val_ds = test_b_val.cache().batch(32).prefetch( tf.data.experimental.AUTOTUNE)
test_ds = test_b_test.cache().batch(32).prefetch( tf.data.experimental.AUTOTUNE)



#test n

'''
dir_="D:/bat_n/df_test_n.csv"
dir_="D:/bat_n/df_test_uk.csv"
df_test_b=pd.read_csv(dir_)

filenames=df_test_b['filename']
targets=df_test_b['target']
folds=df_test_b['fold']
l=[]
for j in range(1,5):
    print((j-1),'--',j*175)
    for i in range(0,175):
        
        l.append(j)
        
        
        
folds=l[:len(df_test_b)]        
        
    

test_b = tf.data.Dataset.from_tensor_slices((filenames, targets,folds))
test_b= test_b.map(load_wav_for_map)

test_b = test_b.map(yamnet_frames_model_transfer1)#.unbatch()


cached_ds = test_b.cache()
test_b_train = cached_ds.filter(lambda embedding, label, fold: fold <2)
test_b_val = cached_ds.filter(lambda embedding, label, fold: fold ==3)
test_b_test = cached_ds.filter(lambda embedding, label, fold: fold <= 2)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda embedding, label, fold: (embedding, label)

test_b_train= test_b_train.map(remove_fold_column)

test_b_val= test_b_val.map(remove_fold_column)

test_b_test= test_b_test.map(remove_fold_column)



#creat a batch of size 32 of frames with size (96,64)
#we have to suffle the train set to avoid the frames from the same audio on one batch
train_ds = test_b_train.cache().shuffle(1000).batch(32).prefetch( tf.data.experimental.AUTOTUNE)
val_ds = test_b_val.cache().batch(32).prefetch( tf.data.experimental.AUTOTUNE)
test_ds = test_b_test.cache().batch(32).prefetch( tf.data.experimental.AUTOTUNE)


yamnet=yamnet_model.yamnet_frames_model_transfer(params)

preloaded_layers = yamnet.layers.copy()
preloaded_weights = []
for pre in preloaded_layers:
        preloaded_weights.append(pre.get_weights())    


#load the weights from pretrain model except for the last layer and
#check which layer used the pretrain weights
# store weights before loading pre-trained weights
chkp==True
if chkp==True:
# load pre-trained weights(fine tuning the model)
#load the weights from pretrain model except for the last layer
    yamnet.load_weights('D:/bat_n/yamnet_2.h5',by_name=True)
    for layer, pre in zip(yamnet.layers, preloaded_weights):
        weights = layer.get_weights()
        if weights:
            if np.array_equal(weights, pre):
                print('not loaded', layer.name)
            else:
                print('loaded', layer.name)





yamnet.compile(optimizer='adam', 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])
yamnet.fit(train_ds,epochs=20)






loss= yamnet.evaluate(test_ds)





from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
import numpy as np

SAMPLE_RATE = 16000
X=list(map(lambda x: x[0], test_b))
X=np.array(X)
#y_train = list(map(lambda x: x[1], train_ds))
augment = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
        ])
augmented_samples = augment(samples=X, sample_rate=16000)
    
    
'''