In [1]:
!pip install noisereduce

Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.3


In [2]:
import pandas as pd
import librosa
import os
import numpy as np
metadata = pd.read_csv('/kaggle/input/urbansound8k/UrbanSound8K.csv')

metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [3]:
import os
import numpy as np
import pandas as pd
import librosa
import noisereduce as nr
from concurrent.futures import ThreadPoolExecutor

def process_audio_file(file_path, sample_rate):
    audio, sr = librosa.load(file_path, sr=sample_rate)
    n_fft = min(2048, len(audio)) 
    cleansed_audio = nr.reduce_noise(y=audio, sr=sr)
    normalized_audio = cleansed_audio/ np.max(np.abs(cleansed_audio))
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13,n_fft=n_fft)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

def load_audio_files(dataframe, sample_rate=22050, duration=5):
    audio_data = []
    labels = []

    with ThreadPoolExecutor() as executor:
        futures = []
        for index, row in dataframe.iterrows():
            file_path = os.path.join('/kaggle/input/urbansound8k', 'fold' + str(row['fold']), row['slice_file_name'])
            futures.append(executor.submit(process_audio_file, file_path, sample_rate))
            labels.append(row['classID'])

        for future in futures:
            audio_data.append(future.result())

    return np.array(audio_data), np.array(labels)


X, y = load_audio_files(metadata)
print(X.shape, y.shape)

(8732, 13) (8732,)


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models

input_shape = (X_train.shape[1], X_train.shape[2])  
model = models.Sequential([
    layers.SimpleRNN(64, input_shape=input_shape, return_sequences=True),
    layers.SimpleRNN(32),
    layers.Dense(64, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
    
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [6]:
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.2)

Epoch 1/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.2760 - loss: 2.0493 - val_accuracy: 0.4180 - val_loss: 1.6929
Epoch 2/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4580 - loss: 1.5583 - val_accuracy: 0.4782 - val_loss: 1.5510
Epoch 3/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5285 - loss: 1.3859 - val_accuracy: 0.5061 - val_loss: 1.4445
Epoch 4/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5486 - loss: 1.3087 - val_accuracy: 0.5319 - val_loss: 1.3847
Epoch 5/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5781 - loss: 1.2259 - val_accuracy: 0.5147 - val_loss: 1.3759
Epoch 6/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5988 - loss: 1.1894 - val_accuracy: 0.5476 - val_loss: 1.3374
Epoch 7/25
[1m175/175[0m 

In [7]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:}')

[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6661 - loss: 1.0553
Test Accuracy: 0.6605609655380249


In [8]:
df = pd.read_csv('/kaggle/input/urbansound8k/UrbanSound8K.csv')
def padding(array, xx, yy):
    h = array.shape[0]
    w = array.shape[1]

    a = max((xx-h)//2 ,0 )
    aa = max(0, (xx-a-h))

    b = max(0, (yy-w)//2)
    bb = max(yy-b-w, 0)

    return np.pad(array, pad_width=((a,aa), (b,bb)), mode='constant')

def preprocess(row, target_sr=16000, target_shape=(64, 13)):
    """
    Preprocess a single audio file to extract MFCC features.
    """
    try:
        file_path = os.path.join('/kaggle/input/urbansound8k', 'fold' + str(row['fold']), row['slice_file_name'])
        # Load audio file
        audio, orig_sr = librosa.load(file_path, sr=target_sr)

        # Extract MFCC features
        mfcc = librosa.feature.mfcc(y=audio, sr=target_sr, n_mfcc=13)

        # Pad or truncate to the target shape
        if mfcc.shape[1] < target_shape[1]:
            mfcc = padding(mfcc, target_shape[0],target_shape[1])
        else:
            mfcc = mfcc[:, :target_shape[1]]

        # Add the preprocessed data to the global lists
        images.append(mfcc)
        labels.append(row['classID'])

    except Exception as e:
        print(f"Error processing file {row['slice_file_name']}: {e}")

# Initialize global lists
images = []
labels = []
# Parallel processing
with ThreadPoolExecutor() as pool:
    futures = []
    for i, row in df.iterrows():
        futures.append(pool.submit(preprocess, row))

    for future in futures:
        future.result()  # Wait for all threads to finish

print("ThreadPool Finished!")
print(f"Processed {len(images)} files.")



ThreadPool Finished!
Processed 8732 files.


In [9]:
target_shape=(64, 13)
def padding2(array, target_shape):
    # Pad array to the target shape
    h, w = array.shape
    a = max((target_shape[0] - h) // 2, 0)
    aa = max(0, (target_shape[0] - a - h))
    b = max(0, (target_shape[1] - w) // 2)
    bb = max(target_shape[1] - b - w, 0)
    
    return np.pad(array, pad_width=((a, aa), (b, bb)), mode='constant')
images_resized=[]
for image in images:
    if image.shape != target_shape:
        # Resize or pad the image to match the target shape
        image_resized = padding2(image, target_shape)
        images_resized.append(image_resized)
    else:
        images_resized.append(image)

# Now convert the list to a NumPy array
imagesd = np.array(images_resized)
print(imagesd.shape)
labels=np.array(labels)

(8732, 64, 13)


In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(imagesd, labels, test_size=0.2, random_state=42)

In [11]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define CNN model
model = models.Sequential([
    layers.Input(shape=(64, 13, 1)),  # Input shape for MFCCs
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')  # num_classes = number of output labels
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [12]:
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.2)

Epoch 1/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.2862 - loss: 3.5220 - val_accuracy: 0.4825 - val_loss: 1.5095
Epoch 2/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.4896 - loss: 1.4929 - val_accuracy: 0.5018 - val_loss: 1.4170
Epoch 3/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.5662 - loss: 1.2848 - val_accuracy: 0.5483 - val_loss: 1.3203
Epoch 4/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.6031 - loss: 1.1799 - val_accuracy: 0.5719 - val_loss: 1.2440
Epoch 5/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.6261 - loss: 1.1139 - val_accuracy: 0.6020 - val_loss: 1.1943
Epoch 6/25
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.6597 - loss: 1.0001 - val_accuracy: 0.6178 - val_loss: 1.1786
Epoch 7/25
[1m175/175

In [13]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:}')

[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7477 - loss: 1.0009
Test Accuracy: 0.7372638583183289
