# soundNet: Classifying Vehicle Classes from Audio Files

Import Libraries/Modules

In [None]:
import pandas as pd
import numpy as np
import pickle
import torch
from torchaudio.transforms import AmplitudeToDB, MelSpectrogram

Config

In [None]:
# Configuration cell
# ------------------
# This cell contains all the configuration for the notebook.

# Path to the directory containing the sound files.
data_dir = '../data/'
annotations_path= '../data/weak_labels.csv'

# Information on the dataset
classes = [1, 2, 5, 12, 15]
header = ['filepath_wav_30', 'vehicle_class', 'vehicle_class_combo', 'vehicle_class_base',	'speed']
_SEED = 42

# Parameters for the spectrogram computation.
feats = {
    'n_mels': 128,
    'n_filters': 2048,
    'hop_length': 256,
    'n_window': 2048,
    'sample_rate': 16000,
    'f_min': 0,
    'f_max': 8000
}

## Data Manipulation

### Loading audio files

In [None]:
# Load the annotations in a pandas dataframe
labels = pd.read_csv(annotations_path, sep='\t')
labels.head()

Unnamed: 0,filepath_wav_30,vehicle_class,vehicle_class_combo,vehicle_class_base,speed
0,wav/20230109/rs_an0005_dt_20230109_tm_015511_t...,2,16,M,33
1,wav/20230109/rs_an0005_dt_20230109_tm_022315_t...,2,16,M,77
2,wav/20230109/rs_an0005_dt_20230109_tm_022947_t...,2,16,M,28
3,wav/20230109/rs_an0005_dt_20230109_tm_023948_t...,5,18,M,20
4,wav/20230109/rs_an0005_dt_20230109_tm_024134_t...,5,18,M,38


In [None]:
# Compare the number of unique filenames and raw number of rows
# to see if there are any duplicates.
unique = labels[header[0]].nunique()
length = labels.shape[0]
if not np.array_equal(unique, length):
    print("There are duplicates in the dataset.")
else:
    print(f"There are {length} unique filenames in the dataset.")

There are 3358 unique filenames in the dataset.


In [None]:
# Verify that the classes are the correct ones
unique_classes = labels[header[1]].unique()
unique_classes.sort()
if not np.array_equal(classes, unique_classes):
    print('The classes are not the same as the ones in the annotations file.')
    print('Classes in annotations file: {}'.format(unique_classes))
    print('Classes in classes variable: {}'.format(classes))
    raise ValueError('The classes are not the same as the ones in the annotations file.')

In [None]:
# Create 3 splits: train, validation and test
# -------------------------------------------
# We will create 3 splits: train, validation and test. The train split will be used to train the model, the validation
# split will be used to evaluate the model during training and the test split will be used to evaluate the model after
# training. We will use a 60/20/20 split.

# Create a list of the unique audio files
audio_files = labels[header[0]].unique()
# Shuffle the list with the defined seed
np.random.seed(_SEED)
np.random.shuffle(audio_files)
# Split the list into 3 parts
train_files = audio_files[:int(0.6 * len(audio_files))]
val_files = audio_files[int(0.6 * len(audio_files)):int(0.8 * len(audio_files))]
test_files = audio_files[int(0.8 * len(audio_files)):]

In [None]:
# Verify the splits
print(f"Number of unique audio files: {length} = {len(train_files)} + {len(val_files)} + {len(test_files)}") if len(audio_files) == len(train_files) + len(val_files) + len(test_files) else print('The splits are not correct.')
print('Number of train files: {}'.format(len(train_files)))
print('Number of test files: {}'.format(len(test_files)))
print('Number of validation files: {}'.format(len(val_files)))

Number of unique audio files: 3358 = 2014 + 672 + 672
Number of train files: 2014
Number of test files: 672
Number of validation files: 672


In [None]:
# Create the partitions
# ---------------------
# The partitions will be together, identified by 3 keys (train, val, test) and the values will be the couple (filename,
# class).
partitions = {'train': [], 'val': [], 'test': []}
for filename in train_files:
    partitions['train'].append((filename, labels[labels[header[0]] == filename][header[1]].values[0]))
for filename in val_files:
    partitions['val'].append((filename, labels[labels[header[0]] == filename][header[1]].values[0]))
for filename in test_files:
    partitions['test'].append((filename, labels[labels[header[0]] == filename][header[1]].values[0]))
    
# Verify the partitions
if len(partitions['train']) + len(partitions['val']) + len(partitions['test']) != len(audio_files):
    print('The partitions are not correct.')

In [None]:
# Save the partitions
# -------------------
# We will save the partitions in a pickle file.
with open(data_dir + 'partitions.pkl', 'wb') as f:
    pickle.dump(partitions, f)

In [None]:
MelSpectrogram(
            sample_rate=feat_params["sample_rate"],
            n_fft=feat_params["n_window"],
            win_length=feat_params["n_window"],
            hop_length=feat_params["hop_length"],
            f_min=feat_params["f_min"],
            f_max=feat_params["f_max"],
            n_mels=feat_params["n_mels"],
            window_fn=torch.hamming_window,
            wkwargs={"periodic": False},
            power=1,
        )