In [1]:
import librosa
import numpy as np
import pandas as pd
import os
import warnings
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)
esc50_path = ('speech/esc50.csv')
audios_path = ('speech/audio/audio/44100')

esc50 = pd.read_csv(esc50_path)

esc50.head()


Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A


In [3]:
#loading and preprocessing data
mfcc_features = []
categories = []

for i in range (0,len(esc50)) :
    audio_name = esc50['filename'][i]  
    category = esc50['category'][i]

    audio_file_path = os.path.join(audios_path, audio_name)
    audio_file_path = audio_file_path.replace('\\', '/')

    y, sr = librosa.load(audio_file_path, sr=None)   
    
    mfcc = librosa.feature.mfcc(y=y, sr=22050, n_mfcc=40)

    mfcc_mean = np.mean(mfcc.T,axis=0)

    mfcc_features.append(mfcc_mean)
    categories.append(category)    #these are in the from of lists and not arrays

mfcc_features = np.array(mfcc_features)
categories = np.array(categories)    #to convert list to arrays

print(f'MFCC Features Shape: {mfcc_features.shape}')
print(f'category Shape: {categories.shape}')



MFCC Features Shape: (2000, 40)
category Shape: (2000,)


In [4]:
#Train and test data split
labelencoder=LabelEncoder()
categories=to_categorical(labelencoder.fit_transform(categories))    

x_train, x_test, y_train, y_test = train_test_split(mfcc_features, categories, test_size=0.3) 



In [5]:

model = tf.keras.Sequential([
    tf.keras.layers.Dense(200, input_shape=(40,), activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(50, activation="softmax")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
model.summary()

In [7]:
model.fit(x_train,y_train,validation_data=(x_test,y_test), epochs=50 ,batch_size=20)


Epoch 1/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0230 - loss: 19.0217 - val_accuracy: 0.0750 - val_loss: 4.9665
Epoch 2/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1437 - loss: 4.0440 - val_accuracy: 0.1050 - val_loss: 3.8735
Epoch 3/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2049 - loss: 3.2823 - val_accuracy: 0.1567 - val_loss: 3.4250
Epoch 4/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2542 - loss: 2.8312 - val_accuracy: 0.1667 - val_loss: 3.1903
Epoch 5/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.2841 - loss: 2.6715 - val_accuracy: 0.1967 - val_loss: 3.1338
Epoch 6/50
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3135 - loss: 2.5175 - val_accuracy: 0.2483 - val_loss: 3.0281
Epoch 7/50
[1m70/70[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e32c630f80>