In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import os

In [2]:
train_ds, validation_ds = tf.keras.utils.audio_dataset_from_directory(
    directory='../reduced_dataset/dataset/audio',
    validation_split=0.4, # stiamo mettendo da parte il 40% del dataset, che sarà suddiviso in validation set e test set
    shuffle=True,
    subset='both', # necessario se stiamo utilizzando validation_split (se no darebbe errore)
    seed=0 # necessario se stiamo utilizzando sia shuffle che validation_split (se no darebbe errore)
)

label_names = train_ds.class_names

Found 12933 files belonging to 30 classes.
Using 7760 files for training.
Using 5173 files for validation.


In [3]:
def squeeze(audio, labels):
    audio = tf.squeeze(audio, axis=-1)
    return audio, labels

# fonte: funzione presa dal seguente link: https://www.geeksforgeeks.org/audio-recognition-in-tensorflow/

In [4]:
train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)
validation_ds = validation_ds.map(squeeze, tf.data.AUTOTUNE)

audio, label = next(iter(train_ds))
print(audio.shape)
print(label.shape)

(32, 16000)
(32,)


In [5]:
def get_spectrogram(waveform):
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)

    return spectrogram[..., tf.newaxis]

# fonte: funzione presa dal seguente link: https://www.geeksforgeeks.org/audio-recognition-in-tensorflow/

In [6]:
def get_spectrogram_dataset(dataset):
    dataset = dataset.map(lambda x, y: (get_spectrogram(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    return dataset

# fonte: funzione presa dal seguente link: https://www.geeksforgeeks.org/audio-recognition-in-tensorflow/

In [7]:
train_ds = get_spectrogram_dataset(train_ds)
validation_ds = get_spectrogram_dataset(validation_ds)

val_ds = validation_ds.take(validation_ds.cardinality() // 2) # ho cambiato nome del validation_ds in modo tale da non creare problemi con l'istruzione seguente
test_ds = validation_ds.skip(validation_ds.cardinality() // 2)

In [139]:
class VoxProfunda():
    def __init__(self):
        self.counter = 0

    def conv2D_x(self, filters, kernel_size, conv_n_layers, repeat, strides=1, padding="valid", name=False):
        self.counter += 1
        
        if name == False:
            if self.counter == repeat:
                name = "conv" + str(self.counter)
            else:
                name = "conv" + str(self.counter) + "_" + str(repeat)
            
        conv2D =  tf.keras.Sequential(name=name)
    
        for repetition in range(repeat):
            for layer in range(conv_n_layers):
                conv2D.add(tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding))
            conv2D.add(tf.keras.layers.BatchNormalization())
            conv2D.add(tf.keras.layers.Activation("relu"))
    
        return conv2D
        
    def get_model(self):
        inputs = tf.keras.Input(shape=(124, 129, 1), name="input")
        resize = tf.keras.layers.Resizing(120, 120, name="resize")(inputs)
        normalization = tf.keras.layers.Normalization(name="normalization")(resize)
    
        # Blocco di apprendimento delle caratteristiche
        # conv1
        conv1 = self.conv2D_x(filters=64, kernel_size=(7,7), conv_n_layers=1, repeat=1, padding="same")(normalization)
    
        # pool1
        pool1 = tf.keras.layers.MaxPooling2D(pool_size=(3,3), strides=2, padding="same", name="pool1")(conv1)
    
        # conv2_x * 3
        conv2_3 = self.conv2D_x(filters=64, kernel_size=(3,3), conv_n_layers=2, repeat=3, padding="same")(pool1)
    
        # conv3_x * 4
        conv3_4 = self.conv2D_x(filters=128, kernel_size=(3,3), conv_n_layers=2, repeat=4, padding="same")(conv2_3)
    
        # conv4_x * 6
        conv4_6 = self.conv2D_x(filters=256, kernel_size=(3,3), conv_n_layers=2, repeat=6, padding="same")(conv3_4)
    
        # conv4_x * 3
        conv5_3 = self.conv2D_x(filters=512, kernel_size=(3,3), conv_n_layers=2, repeat=3, padding="same")(conv4_6)

        # global average pooling
        global_average_pooling = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling")(conv5_3)
        
        # fc1
        fc1 = tf.keras.layers.Dense(512, activation="relu", name="fc1")(global_average_pooling)
        fc2 = tf.keras.layers.Dense(512, activation="relu", name="fc2")(fc1)
        
        outputs = tf.keras.layers.Dense(30, activation="softmax", name="fc3")(fc2)
        return tf.keras.Model(inputs=inputs, outputs=outputs)


model = VoxProfunda()
model = model.get_model()
model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
model.summary()

Model: "model_66"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 124, 129, 1)]     0         
                                                                 
 resize (Resizing)           (None, 120, 120, 1)       0         
                                                                 
 normalization (Normalizatio  (None, 120, 120, 1)      3         
 n)                                                              
                                                                 
 conv1 (Sequential)          (None, 120, 120, 64)      3456      
                                                                 
 pool1 (MaxPooling2D)        (None, 60, 60, 64)        0         
                                                                 
 conv2_3 (Sequential)        (None, 60, 60, 64)        222336    
                                                          

In [141]:
callback = [tf.keras.callbacks.ModelCheckpoint(filepath="bestmodels/resnet34.keras", save_best_only=True, monitor="val_loss")]
history = model.fit(train_ds, epochs=100, validation_data=val_ds, callbacks=callback)

Epoch 1/30
 25/243 [==>...........................] - ETA: 3:51 - loss: 3.4460 - accuracy: 0.0288

KeyboardInterrupt: 