In [None]:
# importing necessary libraries
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# defining paths for 4 classes
siren_path = '/Users/ehsanahmeddhrubo/Downloads/Renesas_challenge/siren'
engine_idling_path = '/Users/ehsanahmeddhrubo/Downloads/Renesas_challenge/engine_idling'
car_horn_path = '/Users/ehsanahmeddhrubo/Downloads/Renesas_challenge/car_horn'
air_conditioner_path = '/Users/ehsanahmeddhrubo/Downloads/Renesas_challenge/air_conditioner'

In [None]:
# listing all the .wav files for 4 classes
siren_dir_list = os.listdir(siren_path)
engine_idling_dir_list = os.listdir(engine_idling_path)
car_horn_dir_list = os.listdir(car_horn_path)
air_conditioner_dir_list = os.listdir(air_conditioner_path)

In [None]:
# picking one audio signal for 4 classes
siren_file = siren_path + "/" + siren_dir_list[0]
engine_idling_file = engine_idling_path + "/" + engine_idling_dir_list[0]
car_horn_file = car_horn_path + "/" + car_horn_dir_list[5]
air_conditioner_file = air_conditioner_path + "/" + air_conditioner_dir_list[0]

In [None]:
ipd.Audio(siren_file)

In [None]:
ipd.Audio(engine_idling_file)

In [None]:
ipd.Audio(car_horn_file)

In [None]:
ipd.Audio(air_conditioner_file)

In [None]:
# loading the audio signals
siren_signal, siren_sr = librosa.load(siren_file)
engine_idling_signal, engine_idling_sr = librosa.load(engine_idling_file)
car_horn_signal, car_horn_sr = librosa.load(car_horn_file)
air_conditioner_signal, air_conditioner_sr = librosa.load(air_conditioner_file)

In [None]:
sr = siren_sr

In [None]:
# duration in seconds of 1 sample
sample_duration = 1 / sr
print(f"One sample lasts for {sample_duration:6f} seconds")

In [None]:
# total number of samples in audio file
tot_samples = len(siren_signal)
tot_samples

In [None]:
# duration of siren audio in seconds
duration = (1 / sr) * tot_samples
print(f"The audio lasts for {duration} seconds")

In [None]:
# plotting waveforms of 4 classes

plt.figure(figsize=(15, 17))

plt.subplot(4, 1, 1)
librosa.display.waveshow(siren_signal, alpha=0.5)
plt.ylim((-0.6, 0.6))
plt.title("Siren")

plt.subplot(4, 1, 2)
librosa.display.waveshow(engine_idling_signal, alpha=0.5)
plt.ylim((-0.6, 0.6))
plt.title("Engine idling")

plt.subplot(4, 1, 3)
librosa.display.waveshow(car_horn_signal, alpha=0.5)
plt.ylim((-0.6, 0.6))
plt.title("Car horn")

plt.subplot(4, 1, 4)
librosa.display.waveshow(air_conditioner_signal, alpha=0.5)
plt.ylim((-0.6, 0.6))
plt.title("Air conditioner")

plt.show()

In [None]:
FRAME_SIZE = 1024
HOP_LENGTH = 512

In [None]:
# extracting short time fourier transform for 4 classes
siren_s = librosa.stft(siren_signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
engine_idling_s = librosa.stft(engine_idling_signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
car_horn_s = librosa.stft(car_horn_signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
air_conditioner_s = librosa.stft(air_conditioner_signal, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)

In [None]:
# calculating spectrogram for 4 classes
siren_y = np.abs(siren_s) ** 2
engine_idling_y = np.abs(engine_idling_s) ** 2
car_horn_y = np.abs(car_horn_s) ** 2
air_conditioner_y = np.abs(air_conditioner_s) ** 2

In [None]:
# calculating log spectrogram for 4 classes
siren_y_log_scale = librosa.power_to_db(siren_y)
engine_idling_y_log_scale = librosa.power_to_db(engine_idling_y)
car_horn_y_log_scale = librosa.power_to_db(car_horn_y)
air_conditioner_y_log_scale = librosa.power_to_db(air_conditioner_y)

In [None]:
def plot_spectrogram(Y, sr, hop_length, y_axis="linear"):
    plt.figure(figsize=(25, 10))
    librosa.display.specshow(Y, 
                             sr=sr, 
                             hop_length=hop_length, 
                             x_axis="time", 
                             y_axis=y_axis)
    plt.colorbar(format="%+2.f")

In [None]:
# plotting log amplitude spectrogram for 4 classes 
plot_spectrogram(siren_y_log_scale, sr, HOP_LENGTH)
plot_spectrogram(engine_idling_y_log_scale, sr, HOP_LENGTH)
plot_spectrogram(car_horn_y_log_scale, sr, HOP_LENGTH)
plot_spectrogram(air_conditioner_y_log_scale, sr, HOP_LENGTH)

In [None]:
# plotting log frequency spectrogram for 4 classes 
plot_spectrogram(siren_y_log_scale, sr, HOP_LENGTH, y_axis="log")
plot_spectrogram(engine_idling_y_log_scale, sr, HOP_LENGTH, y_axis="log")
plot_spectrogram(car_horn_y_log_scale, sr, HOP_LENGTH, y_axis="log")
plot_spectrogram(air_conditioner_y_log_scale, sr, HOP_LENGTH, y_axis="log")

In [None]:
# Example function to pad audio signals to a fixed length
def pad_audio(audio_signal, target_length):
    current_length = len(audio_signal)
    if current_length < target_length:
        padding = target_length - current_length
        padded_signal = np.pad(audio_signal, (0, padding), mode='constant')
    else:
        padded_signal = audio_signal[:target_length]  # Trim if longer
    return padded_signal


In [None]:
# loading all the siren signals
siren_signal = np.zeros((40,88200))
siren_sr = np.zeros((40,1))
dir_list = os.listdir(siren_path)
for i in range(40):
    siren_file = siren_path + "/" + dir_list[i]
    temp_signal, temp_sr = librosa.load(siren_file)
    padded_signal = pad_audio(temp_signal, 88200)
    siren_signal[i,:] = padded_signal
    siren_sr[i,:] = temp_sr

In [None]:
# loading all the engine idling signals
engine_idling_signal = np.zeros((40,88200))
engine_idling_sr = np.zeros((40,1))
dir_list = os.listdir(engine_idling_path)
for i in range(40):
    engine_idling_file = engine_idling_path + "/" + dir_list[i]
    temp_signal, temp_sr = librosa.load(engine_idling_file)
    padded_signal = pad_audio(temp_signal, 88200)
    engine_idling_signal[i,:] = padded_signal
    engine_idling_sr[i,:] = temp_sr


In [None]:
# loading all the car horn signals
car_horn_signal = np.zeros((40,88200))
car_horn_sr = np.zeros((40,1))
dir_list = os.listdir(car_horn_path)
for i in range(40):
    car_horn_file = car_horn_path + "/" + dir_list[i]
    temp_signal, temp_sr = librosa.load(car_horn_file)
    padded_signal = pad_audio(temp_signal, 88200)
    car_horn_signal[i,:] = padded_signal
    car_horn_sr[i,:] = temp_sr

In [None]:
# loading all the air conditioner signals
air_conditioner_signal = np.zeros((40,88200))
air_conditioner_sr = np.zeros((40,1))
dir_list = os.listdir(air_conditioner_path)
for i in range(40):
    air_conditioner_file = air_conditioner_path + "/" + dir_list[i]
    temp_signal, temp_sr = librosa.load(air_conditioner_file)
    padded_signal = pad_audio(temp_signal, 88200)
    air_conditioner_signal[i,:] = padded_signal
    air_conditioner_sr[i,:] = temp_sr

In [None]:
# calculating mfcc for all the siren signals
siren_mfccs = np.zeros((40,13,173))
for i in range(40):
    siren_mfccs[i,:,:] = librosa.feature.mfcc(y=siren_signal[i,:], n_mfcc=13, sr=siren_sr[i])

In [None]:
# calculating mfcc for all the engine idling signals
engine_idling_mfccs = np.zeros((40,13,173))
for i in range(40):
    engine_idling_mfccs[i,:,:] = librosa.feature.mfcc(y=engine_idling_signal[i,:], n_mfcc=13, sr=engine_idling_sr[i])

In [None]:
# calculating mfcc for all the car horn signals
car_horn_mfccs = np.zeros((40,13,173))
for i in range(40):
    car_horn_mfccs[i,:,:] = librosa.feature.mfcc(y=car_horn_signal[i,:], n_mfcc=13, sr=car_horn_sr[i])

In [None]:
# calculating mfcc for all the air conditioner signals
air_conditioner_mfccs = np.zeros((40,13,173))
for i in range(40):
    air_conditioner_mfccs[i,:,:] = librosa.feature.mfcc(y=air_conditioner_signal[i,:], n_mfcc=13, sr=air_conditioner_sr[i])

In [None]:
# calculating 1st derivative mfcc for all the siren signals
siren_delta_mfccs = np.zeros((40,13,173))
for i in range(40):
    siren_delta_mfccs[i,:,:] = librosa.feature.delta(siren_mfccs[i,:,:])

In [None]:
# calculating 1st derivative mfcc for all the engine idling signals
engine_idling_delta_mfccs = np.zeros((40,13,173))
for i in range(40):
    engine_idling_delta_mfccs[i,:,:] = librosa.feature.delta(engine_idling_mfccs[i,:,:])

In [None]:
# calculating 1st derivative mfcc for all the car horn signals
car_horn_delta_mfccs = np.zeros((40,13,173))
for i in range(40):
    car_horn_delta_mfccs[i,:,:] = librosa.feature.delta(car_horn_mfccs[i,:,:])

In [None]:
# calculating 1st derivative mfcc for all the air conditioner signals
air_conditioner_delta_mfccs = np.zeros((40,13,173))
for i in range(40):
    air_conditioner_delta_mfccs[i,:,:] = librosa.feature.delta(air_conditioner_mfccs[i,:,:])

In [None]:
# calculating 2nd derivative mfcc for all the siren signals
siren_delta2_mfccs = np.zeros((40,13,173))
for i in range(40):
    siren_delta2_mfccs[i,:,:] = librosa.feature.delta(siren_mfccs[i,:,:], order=2)

In [None]:
# calculating 2nd derivative mfcc for all the engine idling signals
engine_idling_delta2_mfccs = np.zeros((40,13,173))
for i in range(40):
    engine_idling_delta2_mfccs[i,:,:] = librosa.feature.delta(engine_idling_mfccs[i,:,:], order=2)

In [None]:
# calculating 2nd derivative mfcc for all the car horn signals
car_horn_delta2_mfccs = np.zeros((40,13,173))
for i in range(40):
    car_horn_delta2_mfccs[i,:,:] = librosa.feature.delta(car_horn_mfccs[i,:,:], order=2)

In [None]:
# calculating 2nd derivative mfcc for all the air conditioner signals
air_conditioner_delta2_mfccs = np.zeros((40,13,173))
for i in range(40):
    air_conditioner_delta2_mfccs[i,:,:] = librosa.feature.delta(air_conditioner_mfccs[i,:,:], order=2)

In [None]:
siren_mfccs.shape

In [None]:
FRAME_SIZE = 1024
HOP_LENGTH = 512

In [None]:
# calculating spectral centroid for all the siren signals
siren_sc = np.zeros((40,173))
for i in range(40):
    siren_sc[i,:] = librosa.feature.spectral_centroid(y=siren_signal[i,:], sr=siren_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating spectral centroid for all the engine idling signals
engine_idling_sc = np.zeros((40,173))
for i in range(40):
    engine_idling_sc[i,:] = librosa.feature.spectral_centroid(y=engine_idling_signal[i,:], sr=engine_idling_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating spectral centroid for all the car horn signals
car_horn_sc = np.zeros((40,173))
for i in range(40):
    car_horn_sc[i,:] = librosa.feature.spectral_centroid(y=car_horn_signal[i,:], sr=car_horn_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating spectral centroid for all the air conditioner signals
air_conditioner_sc = np.zeros((40,173))
for i in range(40):
    air_conditioner_sc[i,:] = librosa.feature.spectral_centroid(y=air_conditioner_signal[i,:], sr=air_conditioner_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating bandwidth for all the siren signals
siren_bw = np.zeros((40,173))
for i in range(40):
    siren_bw[i,:] = librosa.feature.spectral_bandwidth(y=siren_signal[i,:], sr=siren_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating bandwidth for all the engine idling signals
engine_idling_bw = np.zeros((40,173))
for i in range(40):
    engine_idling_bw[i,:] = librosa.feature.spectral_bandwidth(y=engine_idling_signal[i,:], sr=engine_idling_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating bandwidth for all the car horn signals
car_horn_bw = np.zeros((40,173))
for i in range(40):
    car_horn_bw[i,:] = librosa.feature.spectral_bandwidth(y=car_horn_signal[i,:], sr=car_horn_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating bandwidth for all the air conditioner signals
air_conditioner_bw = np.zeros((40,173))
for i in range(40):
    air_conditioner_bw[i,:] = librosa.feature.spectral_bandwidth(y=air_conditioner_signal[i,:], sr=air_conditioner_sr[i], n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
def amplitude_envelope(signal, frame_size, hop_length):
    """Calculate the amplitude envelope of a signal with a given frame size nad hop length."""
    amplitude_envelope = []
    
    # calculate amplitude envelope for each frame
    for i in range(0, len(signal), hop_length): 
        amplitude_envelope_current_frame = max(signal[i:i+frame_size]) 
        amplitude_envelope.append(amplitude_envelope_current_frame)
    
    return np.array(amplitude_envelope) 

In [None]:
# calculating amplitude envelope for all the siren signals
siren_ae = np.zeros((40,173))
for i in range(40):
    siren_ae[i,:] = amplitude_envelope(siren_signal[i,:], FRAME_SIZE, HOP_LENGTH)

In [None]:
# calculating amplitude envelope for all the engine idling signals
engine_idling_ae = np.zeros((40,173))
for i in range(40):
    engine_idling_ae[i,:] = amplitude_envelope(engine_idling_signal[i,:], FRAME_SIZE, HOP_LENGTH)

In [None]:
# calculating amplitude envelope for all the car horn signals
car_horn_ae = np.zeros((40,173))
for i in range(40):
    car_horn_ae[i,:] = amplitude_envelope(car_horn_signal[i,:], FRAME_SIZE, HOP_LENGTH)

In [None]:
# calculating amplitude envelope for all the air conditioner signals
air_conditioner_ae = np.zeros((40,173))
for i in range(40):
    air_conditioner_ae[i,:] = amplitude_envelope(air_conditioner_signal[i,:], FRAME_SIZE, HOP_LENGTH)

In [None]:
def rmse(signal, frame_size, hop_length):
    rmse = []
    
    # calculate rmse for each frame
    for i in range(0, len(signal), hop_length): 
        rmse_current_frame = np.sqrt(sum(signal[i:i+frame_size]**2) / frame_size)
        rmse.append(rmse_current_frame)
    return np.array(rmse)

In [None]:
# calculating root mean square energy for all the siren signals
siren_rms = np.zeros((40,173))
for i in range(40):
    siren_rms[i,:] = rmse(siren_signal[i,:], frame_size=FRAME_SIZE, hop_length=HOP_LENGTH)

In [None]:
# calculating root mean square energy for all the engine idling signals
engine_idling_rms = np.zeros((40,173))
for i in range(40):
    engine_idling_rms[i,:] = rmse(engine_idling_signal[i,:], frame_size=FRAME_SIZE, hop_length=HOP_LENGTH)

In [None]:
# calculating root mean square energy for all the car horn signals
car_horn_rms = np.zeros((40,173))
for i in range(40):
    car_horn_rms[i,:] = rmse(car_horn_signal[i,:], frame_size=FRAME_SIZE, hop_length=HOP_LENGTH)

In [None]:
# calculating root mean square energy for all the air conditioner signals
air_conditioner_rms = np.zeros((40,173))
for i in range(40):
    air_conditioner_rms[i,:] = rmse(air_conditioner_signal[i,:], frame_size=FRAME_SIZE, hop_length=HOP_LENGTH)

In [None]:
# calculating zero crossing rate for all the siren signals
siren_zcr = np.zeros((40,173))
for i in range(40):
    siren_zcr[i,:] = librosa.feature.zero_crossing_rate(siren_signal[i,:], frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating zero crossing rate for all the engine idling signals
engine_idling_zcr = np.zeros((40,173))
for i in range(40):
    engine_idling_zcr[i,:] = librosa.feature.zero_crossing_rate(engine_idling_signal[i,:], frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating zero crossing rate for all the car horn signals
car_horn_zcr = np.zeros((40,173))
for i in range(40):
    car_horn_zcr[i,:] = librosa.feature.zero_crossing_rate(car_horn_signal[i,:], frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# calculating zero crossing rate for all the air conditioner signals
air_conditioner_zcr = np.zeros((40,173))
for i in range(40):
    air_conditioner_zcr[i,:] = librosa.feature.zero_crossing_rate(air_conditioner_signal[i,:], frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)[0]

In [None]:
# combining features for 4 classes
combined_mfccs = np.concatenate((siren_mfccs, engine_idling_mfccs, car_horn_mfccs, air_conditioner_mfccs))

In [None]:
combined_mfccs.shape

In [None]:
combined_mfccs_2d = np.reshape(combined_mfccs, (combined_mfccs.shape[0], -1))

In [None]:
combined_mfccs_2d.shape

In [None]:
# creating true labels
numbers = np.array([0, 1, 2, 3])
true_labels = np.repeat(numbers, 40)

In [None]:
true_labels.shape

In [None]:
# creating training and testing set using the random split
X_train, X_test, y_train, y_test = train_test_split(combined_mfccs_2d, true_labels, test_size=0.2, random_state=42)

In [None]:
X_test.shape

In [None]:
y_test

In [None]:
# training with svm classifier
svm_classifier = SVC(kernel='linear', C=1.0)
svm_classifier.fit(X_train, y_train)

# performing prediction on the test set
y_pred_svm = svm_classifier.predict(X_test)

In [None]:
# creating a RF classifier
clf = RandomForestClassifier(n_estimators = 100)  

clf.fit(X_train, y_train)
 
# performing predictions on the test dataset
y_pred_rf = clf.predict(X_test)

In [None]:
# measuring accuracy for svm
accuracy = accuracy_score(y_test, y_pred_svm)
print("Accuracy:", accuracy)

In [None]:
# measure accuracy for RF
accuracy = accuracy_score(y_test, y_pred_rf)
print("Accuracy:", accuracy)

In [None]:
# confusion matrix for svm
cm = confusion_matrix(y_test, y_pred_svm)
ConfusionMatrixDisplay(cm).plot()

In [None]:
# confusion matrix for RF
cm = confusion_matrix(y_test, y_pred_rf)
ConfusionMatrixDisplay(cm).plot()