### Preliminary cell to start the notebook

In [98]:
# libraries
import os
import sys
import platform
print(sys.version)

strong_pc = platform.system() == 'Linux'
in_colab = 'google.colab' in sys.modules
if in_colab:
    if not os.getcwd().split('/')[-1].split('_')[-1]=='2023':
        from google.colab import drive
        drive.mount('/content/drive')
        os.chdir(r'/content/drive/MyDrive/Human_Data_Analytics_Project_2023')

    #!pip install tensorflow==2.11.0
    #!pip install tensorflow_text==2.11.0
    if not 'tensorflow_io' in sys.modules:
        print('Installing tensorflow-IO')
        !pip install tensorflow-io
    if not 'keras' in sys.modules and False:
        print('Installing keras')
        !pip install keras==2.11.0
    if not 'scikeras' in sys.modules:
        print('Installing scikeras')
        !pip install scikeras[tensorflow]
    if not 'keras-tuner' in sys.modules:
        print('installing keras tuner')
        !pip install keras-tuner
        !pip install numba==0.57.0


if 'DEEPNOTE_ENV' in os.environ:
    os.chdir('/..')
    os.chdir('datasets')
    os.chdir('googledrivedeepnoteintegration')
    os.chdir('Human_Data_Analytics_Project_2023')
    if not 'librosa' in sys.modules:
        print('Installing Librosa')
        !pip install librosa
    if not 'scikeras' in sys.modules:
        print('Installing scikeras')
        !pip install scikeras[tensorflow]
    if not 'keras-tuner' in sys.modules:
        print('installing keras tuner')
        !pip install keras-tuner
        !pip install numba==0.57.0

main_dir = os.getcwd()
if main_dir not in sys.path:
    print('Adding the folder for the modules')
    sys.path.append(main_dir)

#BASE LIBRARIES
import numpy as np
import pandas as pd
import h5py
import shutil
import time
import random
import subprocess
import itertools
import warnings
import pickle
import json

#PLOT LIBRARIES
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as ipd
#import plotly.express as px

#AUDIO LIBRARIES
import librosa
from scipy.io import wavfile
from scipy import signal
from scipy.fft import fft,ifft,fftfreq, fftshift
from scipy.signal import stft,spectrogram,periodogram
#from pydub import AudioSegment

#MACHINE LEARNING LIBRARIES
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, LeaveOneOut, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.utils import check_random_state
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import load_model

# GPU SETTINGS FOR LINUX and repressing warnings for windows. References for gpu: https://www.tensorflow.org/guide/gpu
show_gpu_activity = False
if sys.platform == 'linux' and not in_colab:
    if show_gpu_activity:
        tf.debugging.set_log_device_placement(True)

    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
    # Restrict TensorFlow to only allocate a part of memory on the first GPU
        try:
            tf.config.set_logical_device_configuration(
                gpus[0],
                [tf.config.LogicalDeviceConfiguration(memory_limit=6800)])
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Virtual devices must be set before GPUs have been initialized
            print(e)
else:
    warnings.filterwarnings("ignore", category=UserWarning)

from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.utils import plot_model as tf_plot
if in_colab:
    import tensorflow_io as tfio
print("TensorFlow version:", tf.__version__)
# show keras version
import keras
print(f'keras version = {keras.__version__}')
#import keras_tune as kt
from keras import layers
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.regularizers import L1L2
# kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4) # we may use this in some layers...

#RANDOM SETTINGS
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
check_random_state(seed)

#EVALUATION LIBRAIRES
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve
from sklearn.metrics import make_scorer
from sklearn.metrics import RocCurveDisplay, precision_recall_curve, PrecisionRecallDisplay
from sklearn.metrics import precision_recall_fscore_support, auc

#OUR PERSONAL FUNCTIONS
import importlib
from Preprocessing.data_loader import download_dataset,load_metadata
from Preprocessing.exploration_plots import one_random_audio, plot_clip_overview, Spectral_Analysis
from Models.basic_ml import basic_ML_experiments, basic_ML_experiments_gridsearch, build_dataset, extract_flatten_MFCC
from Visualization.model_plot import confusion_matrix,listen_to_wrong_audio

importlib.reload(importlib.import_module('Preprocessing.data_loader'))
importlib.reload(importlib.import_module('Models.basic_ml'))
importlib.reload(importlib.import_module('Visualization.model_plot'))

from Preprocessing.data_loader import load_metadata
df_ESC10, df_ESC50 = load_metadata(main_dir,heads = False, ESC_US = False, statistics=False)

from Preprocessing.data_loader import load_metadata
from Models.basic_ml import basic_ML_experiments, basic_ML_experiments_gridsearch, build_dataset, extract_flatten_MFCC

importlib.reload(importlib.import_module('Models.ann_utils'))
importlib.reload(importlib.import_module('Visualization.model_plot'))

from Models.ann_utils import *
from Models.ann_utils import MFCCWithDeltaLayer,OutputCutterLayer
from Visualization.model_plot import plot_history, confusion_matrix, listen_to_wrong_audio, visualize_the_weights

ESC10_path = os.path.join(main_dir,'Data', 'ESC-10-depth')
samplerate = 22500

3.11.4 (main, Jul  5 2023, 08:54:11) [Clang 14.0.6 ]
TensorFlow version: 2.12.0
keras version = 2.12.0


In [99]:
main_dir = os.getcwd()
subfolder_path = os.path.join(main_dir, 'Data','balanced')

## Dataset management

Function to delete all the files which are too short or too long, we keep 10 sec files and we will sample half times

In [12]:
from pydub import AudioSegment

# Function to check audio duration
def check_audio_duration(audio_path, target_duration):
    try:
        audio = AudioSegment.from_file(audio_path)
        duration_seconds = len(audio) / 1000  # Convert milliseconds to seconds
        return duration_seconds == target_duration  # Check if duration is exactly equal to the target duration
    except Exception as e:
        return False


# Set the root folder path
root_folder = os.path.join(main_dir, 'Data', 'AudioSet')

# Target duration in seconds    
target_duration = 10
file_count = 0
remode_file = 0

# Iterate through all files in the directory and its subdirectories
for root, dirs, files in os.walk(root_folder):
    for file in files:
        # Check if the file has an audio file extension (e.g., .mp3, .wav, .ogg, etc.)
        if file.endswith(('.mp3', '.wav', '.ogg', '.flac', '.aac')):
            audio_path = os.path.join(root, file)
            file_count += 1
            if not check_audio_duration(audio_path, target_duration):
                # delete the file
                os.remove(audio_path)
                remode_file += 1


In [13]:
print(f'Number of files: {file_count}')
print(f'Number of removed files: {remode_file}') # around 3000 files removed


Number of files: 49625
Number of removed files: 2885


Renaming the folders from name = label, to name = number in order to use US_training (modified into AS_training)

In [17]:
#renaming the folders

import os
# Set the root folder path
path = os.path.join(main_dir, 'Data', 'AudioSet')
count = 1
for folder_name in sorted(os.listdir(path)):
    if os.path.isdir(os.path.join(path, folder_name)):
        # Create a new name with a three-digit number
        new_name = f"{count:03d}"
        
        # Construct the full paths to the old and new folders
        old_folder_path = os.path.join(path, folder_name)
        new_folder_path = os.path.join(path, new_name)
        
        # Rename the subfolder
        os.rename(old_folder_path, new_folder_path)
        
        count += 1

Converting wav files if needed (skipped)

In [38]:
# If we want to convert the audio files to OGG format, we can use the following code:
# But we do not do it because it takes too long, instead we use the original files modifying the function create_dataset

# Set the root folder path
root_folder = os.path.join(main_dir, 'Data', 'AudioSet')

# Iterate through all files in the directory and its subdirectories
for root, dirs, files in os.walk(root_folder):
    for file in files:
        # Check if the file has a WAV file extension
        if file.endswith('.wav'):
            # Construct the full path to the file
            file_path = os.path.join(root, file)
            
            # Specify the output OGG file path
            ogg_file_path = file_path.replace('.wav', '.ogg')
            
            # Use VLC to convert WAV to OGG format with a timeout of 60 seconds
            vlc_command = [
                '/Applications/VLC.app/Contents/MacOS/VLC',
                '--intf', 'dummy',  # Use the dummy interface (no GUI)
                '--no-sout-video',  # Disable video output
                '--sout', f'#transcode{{acodec=vorb,ab=128,channels=2,samplerate=44100}}:std{{access=file,mux=ogg,dst="{ogg_file_path}"}}',
                file_path  # Input WAV file
            ]
            
            # Run the VLC command with a timeout
            try: 
                completed_process = subprocess.run(vlc_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=0.5)
            except subprocess.TimeoutExpired:
                #check if there exists a file with the same name and its weight is not 0
                if os.path.exists(ogg_file_path) and os.path.getsize(ogg_file_path) > 0:
                    #print(f'Converted "{file_path}" to "{ogg_file_path}"')
                    continue
                else:
                    try:
                        completed_process = subprocess.run(vlc_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=1)
                    except subprocess.TimeoutExpired:
                        if os.path.exists(ogg_file_path) and os.path.getsize(ogg_file_path) > 0:
                            #print(f'Converted "{file_path}" to "{ogg_file_path}"')
                            continue
                        else: 
                            try:
                                completed_process = subprocess.run(vlc_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=10)
                            except subprocess.TimeoutExpired:
                                if os.path.exists(ogg_file_path) and os.path.getsize(ogg_file_path) > 0:
                                    #print(f'Converted "{file_path}" to "{ogg_file_path}"')
                                    continue
                                else:
                                    print(f'Failed to convert "{file_path}" to "{ogg_file_path}"')
                                    continue
                    continue

## Autoencoder training (using 3.3)

In [122]:
preprocessing = 'STFT'
AE_name = 'AE_Conv_prep_flatten_'+preprocessing + '_AudioSet'
train, val, test, INPUT_DIM = create_AS_dataset('001')

In [123]:
folder_path = 'Saved_Models'  # Replace this with the actual folder path
file_names = [ AE_name+'_count.txt']

for name in file_names:
    file_path = os.path.join(main_dir, folder_path, name)
    with open(file_path, 'w') as f:
        f.write('0')
    print(f"Created {name} with content '0' in folder {folder_path}")


Created AE_Conv_prep_flatten_STFT_AudioSet_count.txt with content '0' in folder Saved_Models


In [124]:
#General function to build an autoencoder
#CONVOLUTIONAL AUTOENCODER WITH VECTORIAL CODE
code_size = 32
n_layers = 2
n_units = 32

# the real build function for general autoencoder (keras code)
def build_autoencoder(img_shape = INPUT_DIM,
                      code_size = code_size,
                      activation = 'tanh',
                      padding = 'valid',
                      n_layers = n_layers, #max number of layers is 3
                      n_units = n_units,
                      kernel_size = (3,3),
                      strides = (2,2),
                      max_pooling = (2,2),
                      regularizer = 1e-4,
                      batch_norm = True,
                      drop_out = 0.0,
                      learning_rate = 1e-3,
                      loss = tf.keras.losses.MeanSquaredError(),
                      metrics = ['mse'],
                      AE_name = AE_name
 ):
    lr = learning_rate
    # encoder
    encoder = tf.keras.Sequential(name='Encoder')
    encoder.add(tf.keras.Input(img_shape))
    for i in range(n_layers):
        encoder.add(layers.Conv2D(n_units * (i+1), kernel_size,strides = strides, activation = activation, padding=padding))
        encoder.add(layers.MaxPool2D(max_pooling, padding='same'))
        if batch_norm:
            encoder.add(layers.BatchNormalization())
        if drop_out > 0:
            encoder.add(layers.Dropout(drop_out))

    # flatten layer to get the code
    my_shape = encoder.layers[-1].output_shape
    encoder.add(layers.Flatten())
    encoder.add(layers.Dense(code_size,activation = activation, activity_regularizer=keras.regularizers.l1(regularizer)))

    # decoder
    decoder = tf.keras.Sequential(name='Decoder')
    decoder.add(tf.keras.Input(code_size))
    decoder.add(layers.Dense(np.prod(my_shape[1:]), activation=activation))
    decoder.add(layers.Reshape(my_shape[1:]))

    # transpose convolutions
    for i in range(n_layers):
        filters = n_units * (n_layers-i) if i<n_layers-1 else 1
        decoder.add(layers.Conv2DTranspose(filters , kernel_size, strides=strides, activation=activation, padding=padding))
        decoder.add(layers.UpSampling2D(size=max_pooling))
        if batch_norm:
            decoder.add(layers.BatchNormalization())

    #final reshape
    decoder.add(tf.keras.layers.Resizing(height = INPUT_DIM[0], width = INPUT_DIM[1], interpolation="bilinear", crop_to_aspect_ratio=False))

    # build the autoencoder with keras.Model
    inp = tf.keras.Input(shape = INPUT_DIM)
    code = encoder(inp)
    reconstruction = decoder(code)
    autoencoder = tf.keras.Model(inputs=inp, outputs=reconstruction, name = AE_name)

    # compile the autoencoder
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr) if sys.platform == 'darwin' or in_colab else tf.keras.optimizers.Adam(learning_rate = lr)
    loss = loss
    metrics = metrics

    autoencoder.compile(optimizer=optimizer,
                loss=loss,
                metrics=metrics)

    #print the number of trainable parameters
    print(f'Model built with { sum(tf.keras.backend.count_params(p) for p in autoencoder.trainable_variables)} trainable params')

    return autoencoder


In [125]:
# Now, you have a TensorFlow dataset with spectrograms and labels.
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import librosa
from scipy.interpolate import RegularGridInterpolator

In [126]:
def create_AS_dataset(folder,val_split = 0.25): #folder = '001'
    # Create an empty list to store spectrograms and labels
    spectrograms = []

    # Define the target shape for spectrograms
    target_shape = (64, 128)

    list_audio_files = os.listdir(os.path.join(main_dir, 'Data', 'AudioSet', folder))

    for audio_file in list_audio_files:
        audio_path = os.path.join(main_dir, 'Data', 'AudioSet', folder , audio_file)
        audio, sr = librosa.load(audio_path, sr=22050, mono=True)
        stft = librosa.stft(audio)

        # Create a grid of indices for the original spectrogram
        x = np.linspace(0, stft.shape[1] - 1, stft.shape[1])
        y = np.linspace(0, stft.shape[0] - 1, stft.shape[0])

        # Create the interpolator function
        interpolator = RegularGridInterpolator((y, x), stft)

        # Create a grid of indices for the target shape
        target_x = np.linspace(0, stft.shape[1] - 1, target_shape[1])
        target_y = np.linspace(0, stft.shape[0] - 1, target_shape[0])

        # Create a meshgrid of target indices
        target_x, target_y = np.meshgrid(target_x, target_y)

        # Stack the indices as a 2D array
        target_indices = np.vstack((target_y.ravel(), target_x.ravel())).T

        # Interpolate to get the resized STFT
        stft = interpolator(target_indices).reshape(target_shape)

        # Change dim to (64, 128, 1)
        stft = stft.reshape(64, 128, 1)

        # Append the spectrogram and label to the lists (modify this part to include labels)
        spectrograms.append(stft)

    # Convert the lists to numpy arrays
    spectrograms = np.array(spectrograms)

    # Create a TensorFlow dataset
    dataset = tf.data.Dataset.from_tensor_slices((spectrograms))

    # Shuffle and batch the dataset (modify batch size and buffer size as needed)
    batch_size = 32
    buffer_size = 1000

    #divide into train, val, test
    dataset = dataset.shuffle(buffer_size)
    n = len(dataset)
    n_val = int(n*val_split)
    n_test = int(n*val_split)
    n_train = n - n_val - n_test
    train = dataset.take(n_train)
    val = dataset.skip(n_train).take(n_val)
    test = dataset.skip(n_train+n_val).take(n_test)
    
    #batch the dataset
    train = train.batch(batch_size)
    val = val.batch(batch_size)
    test = test.batch(batch_size)

    INPUT_DIM = spectrograms[0].shape

    return train, val, test, INPUT_DIM

In [127]:
def AS_training(AE_name,
                autoencoder,
                n_folders,
                epochs = 50,
                preprocessing = None,
                patience=10,
                verbose = 0,
                ndim = 3,
                metrics = ['mse'],
                ):

    #paramteres for the fit and callbacks
    callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_'+metrics[0],
                                                mode='max',
                                                verbose=verbose,
                                                restore_best_weights=True,
                                                patience=patience)]

    #read the file txt to know the folder to start 
    with open(os.path.join(main_dir,'Saved_Models',AE_name+'_count.txt'), 'r') as file:
        last_folder = int(file.read())
        print(f'Last folder trained: {last_folder}')

    if n_folders < last_folder:
        print('The number of folders is smaller than the last folder trained!')
        n_folders = last_folder
    
    for i in range(last_folder+1,n_folders+1):

        #load the model if i > 1
        if i>1:
            autoencoder = tf.keras.models.load_model(os.path.join(main_dir,'Saved_Models',AE_name))

        #create the dataset
        train, val, test, INPUT_DIM =  create_AS_dataset(folder_number=i,
                                                        preprocessing = preprocessing,
                                                        ndim = ndim,
                                                        main_dir = main_dir,
                                                        )
        
        #fit the autoencoder
        history = autoencoder.fit(train, validation_data= val, epochs=epochs, callbacks = callbacks, verbose=0)

        #save the model
        autoencoder.save(os.path.join(main_dir,'Saved_Models',AE_name), save_format  ='keras')

        #show the best epoch
        val_acc_per_epoch = history.history['val_'+metrics[0]]
        best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
        if verbose > 0:
            print('Best epoch: %d' % (best_epoch,))
        
        if verbose > 1:
            #plot the history of the training
            plot_history(history)

            #evaluate the model on the test set
            scores = autoencoder.evaluate(test, return_dict=False)
            display(scores)

        #update the number on the txt file overwritting the previoAS one
        with open(os.path.join(main_dir,'Saved_Models',AE_name+'_count.txt'), 'w') as file:
            file.write(str(i))

    # retrive the size of the model
    print(f"This model has a size of {get_model_size(autoencoder)} MB")
     
    return autoencoder

In [128]:
best_params = {
    'n_layers':1,
    'n_units':16,
    'kernel_size':3,
    'strides':3,
    'max_pooling':3,
    'regularizer':0.0001,
    'padding':'valid',
    'code_size':32,
    'activation':'elu',
    'drop_out':0.0,
    'batch_norm':False,
    'learning_rate':0.005,
}


# build an autoencoder with the best params
autoencoder = build_autoencoder(**best_params)

#autoencoder = tuner.get_best_models(num_models=1)[0] #to create the model with some already wuite good weights
autoencoder.summary()
verbose=0
if verbose>0:
    autoencoder.layers[1].summary()
    autoencoder.layers[2].summary()

epochs = 1 #50
n_folders = 2 #528
verbose = 0

AS_training(AE_name = AE_name, autoencoder = autoencoder, epochs = epochs , n_folders = n_folders , preprocessing = preprocessing, ndim=3, verbose=verbose)

Model built with 102257 trainable params
Model: "AE_Conv_prep_flatten_STFT_AudioSet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 64, 128, 1)]      0         
                                                                 
 Encoder (Sequential)        (None, 32)                50368     
                                                                 
 Decoder (Sequential)        (None, 64, 128, 1)        51889     
                                                                 
Total params: 102,257
Trainable params: 102,257
Non-trainable params: 0
_________________________________________________________________
Last folder trained: 0


TypeError: create_AS_dataset() got an unexpected keyword argument 'folder_number'