In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import librosa

In [2]:
metadata = "../UrbanSound8K/metadata/UrbanSound8K.csv"
df = pd.read_csv(metadata)

labels = df['class'].unique()    # obtaining the class labels
print(labels)

['dog_bark' 'children_playing' 'car_horn' 'air_conditioner' 'street_music'
 'gun_shot' 'siren' 'engine_idling' 'jackhammer' 'drilling']


### Loading and resampling an example

In [3]:
_wav_dir_="../UrbanSound8K/audio/fold1/"
files = librosa.util.find_files(_wav_dir_)
f = files[0]
signal, rate = librosa.load(f, sr=None)

In [4]:
signal_time = 4    # each signal will have 4 seconds of duration
target_rate = 1000    # resampling frequence
new_signal = librosa.resample(signal, orig_sr=rate, target_sr=target_rate)
len(new_signal)

4000

### Loading and resampling all the data

Parameters definition

In [6]:
signal_time = 4    # each signal will have 4 seconds of duration
target_rate = 1000    # resampling frequence

# MFCC parameters
n_mfcc=40
hop_length=round(target_rate*0.0125)
win_length=round(target_rate*0.023)
n_fft=2**14
mfcc_time_size = 4*target_rate//hop_length+1

dataset=np.zeros(shape=[len(files),4*target_rate])
dataset_mfcc=np.zeros(shape=[len(files),n_mfcc,mfcc_time_size])

Obtaining all the resampled data

In [None]:
idx = 0
for i in range(1,11):
    _wav_dir_="../UrbanSound8K/audio/fold" + str(i) + '/'
    files = librosa.util.find_files(_wav_dir_)
    for f in files:
        signal, rate = librosa.load(f, sr=None)
        new_signal = librosa.resample(signal, orig_sr=rate, target_sr=target_rate)
        if len(new_signal)<4*target_rate:
            pass
            # padding
        dataset[idx] = new_signal
        sig_mfcc =librosa.feature.mfcc(y=new_signal,sr=target_rate,n_fft=n_fft,hop_length=hop_length,win_length=win_length,n_mfcc=n_mfcc)
        dataset_mfcc[idx] = sig_mfcc
        idx += 1

## Zero-padding

### Alternative to zero-padding? Reflective padding

Reflective Padding:

    Reflecting the signal at its boundaries instead of zero padding can help preserve the continuity of the signal, preventing alterations of the signal's characteristics, something that might occur with zero padding.
    It can also help reducing artifacts at the edges of the signal and providing a smooth transition from the original signal to the padded region.
    We believe this approach is effective with sound data, due to its symmetric nature.

In [5]:
def reflective_padding(signal, target_duration):
    current_duration = len(signal)
    
    # Calculate the required padding on each side
    padding_needed = target_duration - current_duration
    left_padding = padding_needed // 2
    right_padding = padding_needed - left_padding
    
    # Reflective padding on both sides
    padded_signal = np.pad(signal, (left_padding, right_padding), 'reflect')
    
    return padded_signal

# Example usage:
# Assuming 'original_signal' is your input signal and 'target_duration' is 4 seconds
original_signal = np.array([1, 2, 3, 4, 3, 2, 1])  # Replace this with your actual signal
target_duration = 10

padded_signal = reflective_padding(original_signal, target_duration)

print("Original Signal:", original_signal)
print("Padded Signal:", padded_signal)
print("Padded Signal Duration:", len(padded_signal))


Original Signal: [1 2 3 4 3 2 1]
Padded Signal: [2 1 2 3 4 3 2 1 2 3]
Padded Signal Duration: 10
