In [7]:
pip install librosa

Collecting librosa
  Obtaining dependency information for librosa from https://files.pythonhosted.org/packages/e2/a2/4f639c1168d7aada749a896afb4892a831e2041bebdcf636aebfe9e86556/librosa-0.10.1-py3-none-any.whl.metadata
  Using cached librosa-0.10.1-py3-none-any.whl.metadata (8.3 kB)
Collecting audioread>=2.1.9 (from librosa)
  Obtaining dependency information for audioread>=2.1.9 from https://files.pythonhosted.org/packages/57/8d/30aa32745af16af0a9a650115fbe81bde7c610ed5c21b381fca0196f3a7f/audioread-3.0.1-py3-none-any.whl.metadata
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Obtaining dependency information for soundfile>=0.12.1 from https://files.pythonhosted.org/packages/50/ff/26a4ee48d0b66625a4e4028a055b9f25bc9d7c7b2d17d21a45137621a50d/soundfile-0.12.1-py2.py3-none-win_amd64.whl.metadata
  Using cached soundfile-0.12.1-py2.py3-none-win_amd64.whl.metadata (14 kB)
Collecting pooch>=1.0 (from librosa)
  Obtaining depend

In [2]:
import IPython.display as ipd
import librosa
import librosa.display
import pandas as pd
import os, time, warnings
import seaborn as sns
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    Conv1D,
    MaxPooling1D,
    BatchNormalization,
    Dropout,
    Flatten,
    Conv2D,
    MaxPool2D,
)

warnings.filterwarnings("ignore")

In [2]:
pip install tensorflow


Collecting tensorflowNote: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/e4/14/d795bb156f8cc10eb1dcfe1332b7dbb8405b634688980aa9be8f885cc888/tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.16.1 from https://files.pythonhosted.org/packages/e0/36/6278e4e7e69a90c00e0f82944d8f2713dd85a69d1add455d9e50446837ab/tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.16.1->tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/absl_py-2.1.0-

In [3]:
log_cols = ["model", "accuracy", "train_time", "pred_time"]
log = pd.DataFrame(columns=log_cols)


In [4]:
audio_dataset_path = "C:/Users/THASNEEM FATHIMA/Downloads/UrbanSound8k"
meta_data = pd.read_csv("C:/Users/THASNEEM FATHIMA/Downloads/UrbanSound8k/UrbanSound8K.csv")
meta_data["class"] = meta_data["class"].replace(
    to_replace="air_conditioner", value="Air Conditioner"
)
meta_data["class"] = meta_data["class"].replace(to_replace="car_horn", value="Car Horn")
meta_data["class"] = meta_data["class"].replace(
    to_replace="children_playing", value="Children Playing"
)
meta_data["class"] = meta_data["class"].replace(to_replace="dog_bark", value="Dog Bark")
meta_data["class"] = meta_data["class"].replace(to_replace="drilling", value="Drilling")
meta_data["class"] = meta_data["class"].replace(
    to_replace="engine_idling", value="Engine Idling"
)
meta_data["class"] = meta_data["class"].replace(to_replace="gun_shot", value="Gun Shot")
meta_data["class"] = meta_data["class"].replace(
    to_replace="jackhammer", value="Jackhammer"
)
meta_data["class"] = meta_data["class"].replace(to_replace="siren", value="Siren")
meta_data["class"] = meta_data["class"].replace(
    to_replace="street_music", value="Street Music"
)
meta_data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,Dog Bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,Children Playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,Children Playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,Children Playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,Children Playing


In [5]:
meta_data.groupby("classID")["class"].unique()

classID
0     [Air Conditioner]
1            [Car Horn]
2    [Children Playing]
3            [Dog Bark]
4            [Drilling]
5       [Engine Idling]
6            [Gun Shot]
7          [Jackhammer]
8               [Siren]
9        [Street Music]
Name: class, dtype: object

In [6]:
extracted = []
for index_num, row in tqdm(meta_data.iterrows()):
    file_name = os.path.join(
        os.path.abspath(audio_dataset_path),
        "fold" + str(row["fold"]) + "/",
        str(row["slice_file_name"]),
    )
    final_class_labels = row["class"]
    audio, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    feature = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=128)
    scaled_feature = np.mean(feature.T, axis=0)
    extracted.append([scaled_feature, final_class_labels])

8732it [12:28, 11.67it/s]


In [8]:
pip install resampy


Collecting resampy
  Obtaining dependency information for resampy from https://files.pythonhosted.org/packages/4d/b9/3b00ac340a1aab3389ebcc52c779914a44aadf7b0cb7a3bf053195735607/resampy-0.4.3-py3-none-any.whl.metadata
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
   ---------------------------------------- 0.0/3.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.1 MB 325.1 kB/s eta 0:00:10
    --------------------------------------- 0.1/3.1 MB 465.5 kB/s eta 0:00:07
   - -------------------------------------- 0.1/3.1 MB 459.5 kB/s eta 0:00:07
   - -------------------------------------- 0.1/3.1 MB 504.4 kB/s eta 0:00:06
   - -------------------------------------- 0.1/3.1 MB 532.5 kB/s eta 0:00:06
   -- ------------------------------------- 0.2/3.1 MB 517.2 kB/s eta 

In [7]:
extracted_df = pd.DataFrame(extracted, columns=["feature", "class"])
extracted_df.to_pickle("extracted_df.pkl")
extracted_df.head()

Unnamed: 0,feature,class
0,"[-217.35526, 70.22338, -130.38527, -53.282898,...",Dog Bark
1,"[-424.09818, 109.34077, -52.919525, 60.86475, ...",Children Playing
2,"[-458.79114, 121.38419, -46.520657, 52.00812, ...",Children Playing
3,"[-413.89984, 101.66373, -35.42945, 53.036354, ...",Children Playing
4,"[-446.60352, 113.68541, -52.402206, 60.302044,...",Children Playing


In [8]:
final = pd.DataFrame(extracted, columns=["feature", "class"])
X = np.array(final["feature"].tolist())
y = np.array(final["class"].tolist())

In [9]:
le = LabelEncoder()
Y = to_categorical(le.fit_transform(y))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)
print("Number of training samples = ", X_train.shape[0])
print("Number of testing samples = ", X_test.shape[0])

Number of training samples =  6985
Number of testing samples =  1747


In [11]:
num_labels = Y.shape[1]
ANN_Model = Sequential()
ANN_Model.add(Dense(1000, activation="relu", input_shape=(128,)))
ANN_Model.add(Dense(750, activation="relu"))
ANN_Model.add(Dense(500, activation="relu"))
ANN_Model.add(Dense(250, activation="relu"))
ANN_Model.add(Dense(100, activation="relu"))
ANN_Model.add(Dense(50, activation="relu"))
ANN_Model.add(Dense(num_labels, activation="softmax"))
ANN_Model.summary()

In [12]:
ANN_Model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)

In [13]:
num_epochs = 250
num_batch_size = 32

t0 = time.time()

ANN_Results = ANN_Model.fit(
    X_train,
    y_train,
    batch_size=num_batch_size,
    epochs=num_epochs,
    validation_data=(X_test, y_test),
)

ANN_Model.save("Model1.h5")
print("ANN Model Saved")
train_hist_m1 = pd.DataFrame(ANN_Results.history)
train_m1 = round(time.time() - t0, 3)

Epoch 1/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.3177 - loss: 2.6251 - val_accuracy: 0.6188 - val_loss: 1.1651
Epoch 2/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.6513 - loss: 1.0426 - val_accuracy: 0.7453 - val_loss: 0.8162
Epoch 3/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.7569 - loss: 0.7515 - val_accuracy: 0.8037 - val_loss: 0.6237
Epoch 4/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.8138 - loss: 0.5589 - val_accuracy: 0.8283 - val_loss: 0.5352
Epoch 5/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.8498 - loss: 0.4621 - val_accuracy: 0.8638 - val_loss: 0.4721
Epoch 6/250
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.8865 - loss: 0.3602 - val_accuracy: 0.8724 - val_loss: 0.4263
Epoch 7/250
[1m



ANN Model Saved


In [15]:
acc_m1 = ANN_Model.evaluate(X_test, y_test, verbose=0)
t0 = time.time()
y_pred_m1 = ANN_Model.predict(X_test, verbose=0)
pred_m1 = round(time.time() - t0, 3)
log_entry = pd.DataFrame(
    [["ANN", acc_m1[1] * 100, train_m1, pred_m1]], columns=log_cols
)
log = pd.concat([log, log_entry], ignore_index=True)


In [16]:
def ANN_Prediction(file_name):
    audio_data, sample_rate = librosa.load(file_name, res_type="kaiser_fast")
    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=128)
    feature_scaled = np.mean(feature.T, axis=0)
    prediction_feature = np.array([feature_scaled])
    predicted_vector = np.argmax(ANN_Model.predict(prediction_feature), axis=-1)
    predicted_class = le.inverse_transform(predicted_vector)
    print("ANN has predicted the class as  --> ", predicted_class[0])

In [18]:
file_name = audio_dataset_path + "/fold8/103076-3-0-0.wav"
ANN_Prediction(file_name)
ipd.Audio(file_name)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
ANN has predicted the class as  -->  Dog Bark
