In [None]:
# This Notebook was authored by Prasann
# Github - https://github.com/prasann2004
# Kaggle - https://www.kaggle.com/newtonbaba12345
# Linkedin - https://www.linkedin.com/in/prasann-8b9a04263/

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.models as models
import tensorflow.keras.layers as layers
import IPython
import sklearn
import seaborn as sns
from sklearn.utils import shuffle

%load_ext tensorboard



In [4]:
CSV_FILE_PATH = "../input/environmental-sound-classification-50/esc50.csv"  # path of csv file
DATA_PATH = "../input/environmental-sound-classification-50/audio/audio/16000/" # path to folder containing audio files
df = pd.read_csv(CSV_FILE_PATH)

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A


In [5]:
df_10 = df[df['esc10']==True]
df_10 = df_10.drop(['esc10','src_file','take'], axis=1)

classes = df_10['category'].unique()
class_dict = {i:x for x,i in enumerate(classes)}
print(class_dict)
df_10['target'] = df_10['category'].map(class_dict)

sample_df = df_10.drop_duplicates(subset=['target'])
sample_df = sample_df.drop(sample_df.index[4:10])
print(sample_df.head())

{'dog': 0, 'chainsaw': 1, 'crackling_fire': 2, 'helicopter': 3, 'rain': 4, 'crying_baby': 5, 'clock_tick': 6, 'sneezing': 7, 'rooster': 8, 'sea_waves': 9}
             filename  fold  target        category
0    1-100032-A-0.wav     1       0             dog
24  1-116765-A-41.wav     1       1        chainsaw
54   1-17150-A-12.wav     1       2  crackling_fire
55  1-172649-A-40.wav     1       3      helicopter


In [6]:
class conf:
    sr = 16000
    duration = 3
    hop_length = 340*duration
    fmin = 20
    fmax = sr // 2
    n_mels = 128
    n_fft = n_mels * 20
    samples = sr * duration
    epochs = 30

def read_audio(conf, pathname, trim_long_data):
    y, sr = librosa.load(pathname, sr=conf.sr)
    if 0 < len(y): 
        y, _ = librosa.effects.trim(y)
    if len(y) > conf.samples: 
        if trim_long_data:
            y = y[0:0+conf.samples]
    else: 
        padding = conf.samples - len(y)    
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), 'constant')
    return y

def audio_to_melspectrogram(conf, audio):
    spectrogram = librosa.feature.melspectrogram(y=audio, 
                                                 sr=conf.sr,
                                                 n_mels=conf.n_mels,
                                                 hop_length=conf.hop_length,
                                                 n_fft=conf.n_fft,
                                                 fmin=conf.fmin,
                                                 fmax=conf.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    return spectrogram

def show_melspectrogram(conf, mels, title='Log-frequency power spectrogram'):
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=conf.sr, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()

In [33]:
INPUTSHAPE = (128, 16, 1)
def create_model():
    created_model =  models.Sequential([
        layers.Conv2D(64 , (3,3),activation = 'relu',padding='same', input_shape = INPUTSHAPE),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3,3), activation='relu',padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2), strides=(2,2)),
        layers.Dropout(0.2),

        layers.Conv2D(128, (3,3), activation='relu',padding='same'),                      
        layers.BatchNormalization(),
        layers.Conv2D(128, (3,3), activation='relu',padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2), strides=(2,2)),
        layers.Dropout(0.2),

        layers.Conv2D(256, (3,3), activation='relu',padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3,3), activation='relu',padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2), strides=(2,2)),    
        layers.Dropout(0.2),

        layers.GlobalAveragePooling2D(),

        layers.Dense(256 , activation = 'relu'),
        layers.Dense(256 , activation = 'relu'),
        layers.Dense(len(classes) , activation = 'softmax')
    ])

    created_model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics=['acc'])
    return created_model

In [34]:
# Our model summary
model = create_model()
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 128, 16, 64)       640       
                                                                 
 batch_normalization_12 (Ba  (None, 128, 16, 64)       256       
 tchNormalization)                                               
                                                                 
 conv2d_13 (Conv2D)          (None, 128, 16, 64)       36928     
                                                                 
 batch_normalization_13 (Ba  (None, 128, 16, 64)       256       
 tchNormalization)                                               
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 64, 8, 64)         0         
 g2D)                                                            
                                                      

In [35]:
%mkdir "cpkt"
%mkdir "logs"
LOGDIR = "logs"
CPKT = "cpkt/"

callback_1 = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False
)

callback_2 = tf.keras.callbacks.ModelCheckpoint(
    CPKT, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True, mode='auto', save_freq='epoch', options=None
)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOGDIR)

mkdir: cannot create directory 'cpkt': File exists
mkdir: cannot create directory 'logs': File exists


In [36]:
def preprocess(fold):
    x_train , y_train = [] , []
    x_val , y_val = [] , []
    
    train_df = df_10[df_10.fold != fold]
    val_df = df_10[df_10.fold == fold]
    
    for data in train_df.itertuples():
        sig , sr = librosa.load(DATA_PATH+data[1], sr=16000)
        for i in range(4):
            sig_ = sig[i : int((i+2)+sr)]
            mel_spec = audio_to_melspectrogram(conf, sig_)
            x_train.append(mel_spec)
            y_train.append(data[3])

    for data in val_df.itertuples():
        sig , sr = librosa.load(DATA_PATH+data[1], sr=16000)
        for i in range(4):
            sig_ = sig[i : int((i+2)+sr)]
            mel_spec = audio_to_melspectrogram(conf, sig_)
            x_val.append(mel_spec)
            y_val.append(data[3])
            
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_val = np.array(x_val)
    y_val = np.array(y_val)

    y_train = tf.keras.utils.to_categorical(y_train , num_classes=len(classes))
    y_val = tf.keras.utils.to_categorical(y_val , num_classes=len(classes))

    
    x_train, y_train = shuffle(x_train, y_train)
    x_val, y_val = shuffle(x_val, y_val)
    
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], x_val.shape[2], 1)
    
    return (x_train, y_train, x_val, y_val)

In [37]:
num_folds = len(df_10.fold.unique())

model_history = []
metrics = []



for fold in range(1, num_folds+1):
    print('\n\nTraining fold', fold)
    print('*' * 20)
    
    x_train, y_train, x_val, y_val = preprocess(fold)
    model = create_model()
    history = model.fit(x_train,y_train ,
            validation_data=(x_val,y_val),
            epochs=conf.epochs,
            callbacks = [callback_1], verbose=2)
    eval_score = model.evaluate(x_val, y_val)
    print("Val Score: ",eval_score )
    model_history.append(history)
    metrics.append(eval_score)



Training fold 1
********************
Epoch 1/30


2024-01-18 21:49:56.020823: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_3/dropout_9/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


40/40 - 9s - loss: 1.3903 - acc: 0.4914 - val_loss: 6.2467 - val_acc: 0.1000 - 9s/epoch - 234ms/step
Epoch 2/30
40/40 - 1s - loss: 0.8297 - acc: 0.6883 - val_loss: 5.4521 - val_acc: 0.1875 - 591ms/epoch - 15ms/step
Epoch 3/30
40/40 - 1s - loss: 0.6709 - acc: 0.7484 - val_loss: 2.8550 - val_acc: 0.3625 - 592ms/epoch - 15ms/step
Epoch 4/30
40/40 - 1s - loss: 0.5262 - acc: 0.7969 - val_loss: 3.6554 - val_acc: 0.3875 - 592ms/epoch - 15ms/step
Epoch 5/30
40/40 - 1s - loss: 0.3741 - acc: 0.8594 - val_loss: 1.4279 - val_acc: 0.6000 - 597ms/epoch - 15ms/step
Epoch 6/30
40/40 - 1s - loss: 0.3082 - acc: 0.8898 - val_loss: 1.4531 - val_acc: 0.6250 - 595ms/epoch - 15ms/step
Epoch 7/30
40/40 - 1s - loss: 0.4113 - acc: 0.8398 - val_loss: 2.7234 - val_acc: 0.4875 - 591ms/epoch - 15ms/step
Epoch 8/30
40/40 - 1s - loss: 0.2887 - acc: 0.8945 - val_loss: 1.8684 - val_acc: 0.5875 - 588ms/epoch - 15ms/step
Epoch 9/30
40/40 - 1s - loss: 0.2358 - acc: 0.9109 - val_loss: 1.4224 - val_acc: 0.7125 - 589ms/epoch

2024-01-18 21:50:34.876948: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_4/dropout_12/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


40/40 - 7s - loss: 1.2923 - acc: 0.5500 - val_loss: 3.6923 - val_acc: 0.1375 - 7s/epoch - 175ms/step
Epoch 2/30
40/40 - 1s - loss: 0.7978 - acc: 0.7125 - val_loss: 3.0593 - val_acc: 0.4500 - 590ms/epoch - 15ms/step
Epoch 3/30
40/40 - 1s - loss: 0.5605 - acc: 0.7984 - val_loss: 2.0896 - val_acc: 0.5125 - 594ms/epoch - 15ms/step
Epoch 4/30
40/40 - 1s - loss: 0.5123 - acc: 0.8047 - val_loss: 2.8889 - val_acc: 0.4875 - 590ms/epoch - 15ms/step
Epoch 5/30
40/40 - 1s - loss: 0.4241 - acc: 0.8516 - val_loss: 2.0060 - val_acc: 0.5625 - 595ms/epoch - 15ms/step
Epoch 6/30
40/40 - 1s - loss: 0.2566 - acc: 0.8992 - val_loss: 1.6400 - val_acc: 0.6625 - 594ms/epoch - 15ms/step
Epoch 7/30
40/40 - 1s - loss: 0.3062 - acc: 0.8859 - val_loss: 1.4438 - val_acc: 0.6469 - 592ms/epoch - 15ms/step
Epoch 8/30
40/40 - 1s - loss: 0.2977 - acc: 0.8906 - val_loss: 1.5410 - val_acc: 0.6375 - 597ms/epoch - 15ms/step
Epoch 9/30
40/40 - 1s - loss: 0.2537 - acc: 0.9086 - val_loss: 2.1376 - val_acc: 0.5719 - 596ms/epoch

2024-01-18 21:51:13.240663: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_5/dropout_15/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


40/40 - 7s - loss: 1.3236 - acc: 0.5305 - val_loss: 7.8852 - val_acc: 0.2000 - 7s/epoch - 164ms/step
Epoch 2/30
40/40 - 1s - loss: 0.7919 - acc: 0.6953 - val_loss: 4.5746 - val_acc: 0.4000 - 594ms/epoch - 15ms/step
Epoch 3/30
40/40 - 1s - loss: 0.7222 - acc: 0.7578 - val_loss: 3.6284 - val_acc: 0.3625 - 596ms/epoch - 15ms/step
Epoch 4/30
40/40 - 1s - loss: 0.4789 - acc: 0.8328 - val_loss: 1.8446 - val_acc: 0.5375 - 596ms/epoch - 15ms/step
Epoch 5/30
40/40 - 1s - loss: 0.4655 - acc: 0.8430 - val_loss: 2.3038 - val_acc: 0.5250 - 597ms/epoch - 15ms/step
Epoch 6/30
40/40 - 1s - loss: 0.3652 - acc: 0.8797 - val_loss: 2.0497 - val_acc: 0.6375 - 599ms/epoch - 15ms/step
Epoch 7/30
40/40 - 1s - loss: 0.3346 - acc: 0.8758 - val_loss: 1.3706 - val_acc: 0.6281 - 633ms/epoch - 16ms/step
Epoch 8/30
40/40 - 1s - loss: 0.2718 - acc: 0.9094 - val_loss: 1.1217 - val_acc: 0.6875 - 604ms/epoch - 15ms/step
Epoch 9/30
40/40 - 1s - loss: 0.1984 - acc: 0.9227 - val_loss: 1.4813 - val_acc: 0.7063 - 594ms/epoch

2024-01-18 21:51:47.074777: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_6/dropout_18/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


40/40 - 6s - loss: 1.2139 - acc: 0.5633 - val_loss: 8.5558 - val_acc: 0.1875 - 6s/epoch - 161ms/step
Epoch 2/30
40/40 - 1s - loss: 0.7173 - acc: 0.7430 - val_loss: 4.2231 - val_acc: 0.3156 - 590ms/epoch - 15ms/step
Epoch 3/30
40/40 - 1s - loss: 0.5872 - acc: 0.7820 - val_loss: 5.3459 - val_acc: 0.2500 - 610ms/epoch - 15ms/step
Epoch 4/30
40/40 - 1s - loss: 0.4602 - acc: 0.8422 - val_loss: 3.1492 - val_acc: 0.3969 - 599ms/epoch - 15ms/step
Epoch 5/30
40/40 - 1s - loss: 0.3700 - acc: 0.8562 - val_loss: 5.2497 - val_acc: 0.4625 - 594ms/epoch - 15ms/step
Epoch 6/30
40/40 - 1s - loss: 0.3771 - acc: 0.8586 - val_loss: 3.5766 - val_acc: 0.4125 - 588ms/epoch - 15ms/step
Epoch 7/30
40/40 - 1s - loss: 0.3060 - acc: 0.8828 - val_loss: 2.4302 - val_acc: 0.5312 - 594ms/epoch - 15ms/step
Epoch 8/30
40/40 - 1s - loss: 0.2206 - acc: 0.9125 - val_loss: 2.4067 - val_acc: 0.4875 - 597ms/epoch - 15ms/step
Epoch 9/30
40/40 - 1s - loss: 0.1500 - acc: 0.9461 - val_loss: 1.4533 - val_acc: 0.6625 - 599ms/epoch

2024-01-18 21:52:30.979785: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_7/dropout_21/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


40/40 - 7s - loss: 1.3493 - acc: 0.5281 - val_loss: 5.7208 - val_acc: 0.2625 - 7s/epoch - 166ms/step
Epoch 2/30
40/40 - 1s - loss: 0.8089 - acc: 0.7023 - val_loss: 6.5571 - val_acc: 0.2000 - 601ms/epoch - 15ms/step
Epoch 3/30
40/40 - 1s - loss: 0.7067 - acc: 0.7422 - val_loss: 3.1165 - val_acc: 0.3875 - 592ms/epoch - 15ms/step
Epoch 4/30
40/40 - 1s - loss: 0.4278 - acc: 0.8461 - val_loss: 1.4570 - val_acc: 0.6375 - 593ms/epoch - 15ms/step
Epoch 5/30
40/40 - 1s - loss: 0.3689 - acc: 0.8617 - val_loss: 1.4252 - val_acc: 0.6500 - 589ms/epoch - 15ms/step
Epoch 6/30
40/40 - 1s - loss: 0.4375 - acc: 0.8445 - val_loss: 2.1751 - val_acc: 0.5688 - 588ms/epoch - 15ms/step
Epoch 7/30
40/40 - 1s - loss: 0.2646 - acc: 0.8977 - val_loss: 2.4242 - val_acc: 0.5875 - 588ms/epoch - 15ms/step
Epoch 8/30
40/40 - 1s - loss: 0.2660 - acc: 0.9086 - val_loss: 2.1958 - val_acc: 0.5875 - 594ms/epoch - 15ms/step
Epoch 9/30
40/40 - 1s - loss: 0.2068 - acc: 0.9227 - val_loss: 1.6606 - val_acc: 0.6375 - 592ms/epoch

In [None]:
model.save("xyz")