In [1]:
import numpy as np
import tensorflow as tf
import scipy.fftpack as scipy
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [59]:
class DatasetConverter:
    def __init__(self, dataset):
        self.dataset = dataset

    def convert(self, option):
        available_options = ['spettrogrammi', 'filterbanks', 'mfcc']
        
        if option == available_options[0]:
            return self.get_spectrogram_dataset()
        elif option == available_options[1]:
            return self.get_filterbanks_dataset()
        elif option == available_options[2]:
            return self.get_mfcc_dataset()
        else:
            raise ValueError(f"Opzione non disponibile: inserire una delle seguenti opzioni: {available_options}")
    
    # INIZIO SPETTROGRAMMI
    def squeeze(self, audio, labels):
        audio = tf.squeeze(audio, axis=-1)
        return audio, labels
    
    def get_spectrogram(self, waveform):
    # applichiamo la short-time Fourier transorm
        spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)
        spectrogram = tf.abs(spectrogram)
        
        return spectrogram[..., tf.newaxis]
    
    def get_spectrogram_dataset(self):
        # squeeze
        self.dataset = self.dataset.map(self.squeeze, tf.data.AUTOTUNE)
        self.dataset = self.dataset.map(lambda x, y: (self.get_spectrogram(x), y), num_parallel_calls=tf.data.AUTOTUNE)
        
        return self.dataset

    # FINE SPETTROGRAMMI
    
    # INIZIO FILTERBANKS
    def pre_emphasis(self, samples, pre_emphasis_rate=0.97):
        # Apply pre-emphasis
        samples = tf.concat([samples[0:1], samples[1:] - pre_emphasis_rate * samples[:-1]], axis=0)
        
        return samples

    def framing_phase(self, emphasized_audio, sample_rate=16000, frame_size=0.025, frame_stride=0.01, audio_length=16000):        
        frame_length, frame_step = int(frame_size * sample_rate), int(frame_stride * sample_rate)

        num_frames = int(np.ceil(float(np.abs(audio_length - frame_length)) / frame_step))

        pad_audio_length = num_frames * frame_step + frame_length
        z = tf.zeros([pad_audio_length - audio_length], dtype=tf.float32)
        z = tf.reshape(z, [-1, 1])
        
        pad_audio = tf.concat([emphasized_audio[:, tf.newaxis], z], axis=0)

        indices_frame = tf.tile(tf.range(0, frame_length), [num_frames])
        indices_step = tf.repeat(tf.range(0, num_frames * frame_step, frame_step), repeats=frame_length)
        indices = tf.reshape(indices_frame + indices_step, (num_frames, frame_length))

        frames = tf.gather(pad_audio, indices)

        return frames

    def makeHamming(self, M):
        R = (( M - 1 ) / 2 , M / 2)[M % 2 == 0]
        w = (np.hamming(M), np.hamming(M + 1))[M % 2 == 0]
        if M % 2 != 0:
            w[0] = w[0]/2
            w[M-1] = w[M-1]/2
        else:
            w = w[:M]
            
        return w

    def fourier_transform(self, frames, NFFT=512):
        frames_complex = tf.signal.rfft(frames)
        mag_frames = tf.abs(frames_complex)  # Magnitude of FFT
        pow_frames = (1.0 / NFFT) * (mag_frames ** 2)

        return pow_frames

    def computer_filterbanks(self, pow_frames, nfilt=40, NFFT=401, sample_rate=16000):
        low_freq_mel = 2595 * np.log10(1 + 0 / 700)
        high_freq_mel = 2595 * np.log10(1 + (sample_rate / 2) / 700)

        mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
        hz_points = 700 * (10**(mel_points / 2595) - 1)

        bin = np.floor((NFFT + 1) * hz_points / sample_rate)

        fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))

        for m in range(1, nfilt + 1):
            f_m_minus = int(bin[m - 1]) 
            f_m = int(bin[m]) 
            f_m_plus = int(bin[m + 1])  
        
            for k in range(f_m_minus, f_m):
                fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
            for k in range(f_m, f_m_plus):
                fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])

        filter_banks = tf.linalg.matmul(pow_frames, fbank.T)
        epsilon = tf.constant(tf.keras.backend.epsilon(), dtype=filter_banks.dtype)
        filter_banks = tf.where(tf.equal(filter_banks, 0), epsilon, filter_banks)

        filter_banks = 20 * tf.math.log(tf.abs(filter_banks))

        return filter_banks[..., tf.newaxis]
    
    def get_filterbanks_dataset(self): 
        self.dataset = self.dataset.unbatch()
        
        self.dataset = self.dataset.map(lambda x, y: (self.pre_emphasis(x), y),
                                        num_parallel_calls=tf.data.AUTOTUNE)

        self.dataset = self.dataset.map(self.squeeze, 
                                        num_parallel_calls=tf.data.AUTOTUNE)

        frames_dataset = self.dataset.map(lambda x, y: (self.framing_phase(x), y), 
                                          num_parallel_calls=tf.data.AUTOTUNE) 

        frames_dataset = frames_dataset.map(self.squeeze,
                                            num_parallel_calls=tf.data.AUTOTUNE)

        hamming_window = self.makeHamming(400)
        hw_frames_dataset = frames_dataset.map(lambda x, y: (x * hamming_window, y), 
                                               num_parallel_calls=tf.data.AUTOTUNE)

        ft_frames_dataset = hw_frames_dataset.map(lambda x, y: (self.fourier_transform(x), y), 
                                                  num_parallel_calls=tf.data.AUTOTUNE)

        fb_dataset = ft_frames_dataset.map(lambda x, y: (self.computer_filterbanks(x), y), num_parallel_calls=tf.data.AUTOTUNE)

        return fb_dataset
    # FINE FILTERBANKS

    # FINE MFCC

In [60]:
train_ds, validation_ds = tf.keras.utils.audio_dataset_from_directory(
    directory='../reduced_dataset/dataset/audio',
    validation_split=0.4, # stiamo mettendo da parte il 40% del dataset, che sarà suddiviso in validation set e test set
    shuffle=True,
    subset='both', # necessario se stiamo utilizzando validation_split (se no darebbe errore)
    seed=0 # necessario se stiamo utilizzando sia shuffle che validation_split (se no darebbe errore)
)

label_names = train_ds.class_names

Found 12933 files belonging to 30 classes.
Using 7760 files for training.
Using 5173 files for validation.


In [61]:
train_ds = DatasetConverter(train_ds)
train_ds = train_ds.convert('filterbanks')

In [62]:
val_ds = validation_ds.take(validation_ds.cardinality() // 2)
test_ds = validation_ds.skip(validation_ds.cardinality() // 2)

In [63]:
def get_basic_model(input_shape):
    # Dichiariamo lo shape dell'input, ridimensioniamo le immagini, e normalizziamo
    inputs = tf.keras.Input(shape=input_shape, name="inputs")
    x = tf.keras.layers.Normalization(name="normalizzazione")(inputs)

    # Blocco di apprendimento delle caratteristiche
    conv2D_1 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu", padding="same", name="conv2D_1")(x)
    MaxPooling2D_1 = tf.keras.layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_1")(conv2D_1)
    conv2D_2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding="same", name="conv2D_2")(MaxPooling2D_1)
    MaxPooling2D_2 = tf.keras.layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_2")(conv2D_2)
    conv2D_3 = tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding="same", name="conv2D_3")(MaxPooling2D_2)
    MaxPooling2D_3 = tf.keras.layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_3")(conv2D_3)
    conv2D_4 = tf.keras.layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding="same", name="conv2D_4")(MaxPooling2D_3)
    MaxPooling2D_4 = tf.keras.layers.MaxPooling2D(pool_size=2, name="MaxPooling2D_4")(conv2D_4)
    conv2D_5 = tf.keras.layers.Conv2D(filters=256, kernel_size=3, activation="relu", padding="same", name="conv2D_5")(MaxPooling2D_4)

    # Blocco di classificazione
    Flatten = tf.keras.layers.Flatten(name="Flatten")(conv2D_5)
    outputs = tf.keras.layers.Dense(30, activation="softmax", name="dense_output")(Flatten)

    # Modello
    return tf.keras.Model(inputs=inputs, outputs=outputs)

In [64]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [65]:
for audio, label in train_ds:
    basic_model = get_basic_model(audio[0].shape)
    break
    
basic_model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

basic_model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, 98, 40, 1)]       0         
                                                                 
 normalizzazione (Normalizat  (None, 98, 40, 1)        3         
 ion)                                                            
                                                                 
 conv2D_1 (Conv2D)           (None, 98, 40, 32)        320       
                                                                 
 MaxPooling2D_1 (MaxPooling2  (None, 49, 20, 32)       0         
 D)                                                              
                                                                 
 conv2D_2 (Conv2D)           (None, 49, 20, 64)        18496     
                                                                 
 MaxPooling2D_2 (MaxPooling2  (None, 24, 10, 64)       0   

In [66]:
basic_model_callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath="bestmodels/rmsprop/basic_model_fb.keras", save_best_only=True, monitor="val_loss")]
basic_model_history = basic_model.fit(train_ds, epochs=30, batch_size=32, validation_data=val_ds, callbacks=basic_model_callbacks)

Epoch 1/30
    242/Unknown - 5s 17ms/step - loss: 3.9541 - accuracy: 0.0642

InvalidArgumentError: Graph execution error:

Detected at node 'Squeeze' defined at (most recent call last):
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\kernelapp.py", line 701, in start
      self.io_loop.start()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\asyncio\windows_events.py", line 321, in run_forever
      super().run_forever()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\kernelbase.py", line 534, in dispatch_queue
      await self.process_one()
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\kernelbase.py", line 523, in process_one
      await dispatch(*args)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\kernelbase.py", line 429, in dispatch_shell
      await result
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
      reply_content = await reply_content
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\ipkernel.py", line 429, in do_execute
      res = shell.run_cell(
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell
      result = self._run_cell(
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell
      result = runner(coro)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Gabry\AppData\Local\Temp\ipykernel_25928\2285221270.py", line 2, in <module>
      basic_model_history = basic_model.fit(train_ds, epochs=30, batch_size=32, validation_data=val_ds, callbacks=basic_model_callbacks)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1606, in fit
      val_logs = self.evaluate(
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1947, in evaluate
      tmp_logs = self.test_function(iterator)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1727, in test_function
      return step_function(self, iterator)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1713, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1701, in run_step
      outputs = model.test_step(data)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1668, in test_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\training.py", line 1092, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\engine\compile_utils.py", line 605, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\utils\metrics_utils.py", line 77, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\metrics\base_metric.py", line 143, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\metrics\base_metric.py", line 700, in update_state
      matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\metrics\metrics.py", line 3669, in sparse_categorical_accuracy
      matches = metrics_utils.sparse_categorical_matches(y_true, y_pred)
    File "C:\Users\Gabry\anaconda3\envs\DL\lib\site-packages\keras\utils\metrics_utils.py", line 962, in sparse_categorical_matches
      y_true = tf.squeeze(y_true, [-1])
Node: 'Squeeze'
Can not squeeze dim[1], expected a dimension of 1, got 32
	 [[{{node Squeeze}}]] [Op:__inference_test_function_8271]