In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio

In [2]:
paths=[]
labels=[]

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        paths.append(path)
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())

In [3]:
df = pd.DataFrame()
df['path'] = paths
df['label'] = labels

In [4]:
def get_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [5]:
X_mfcc = df['path'].apply(lambda x: get_mfcc(x))

In [6]:
X = [x for x in X_mfcc]
X = np.array(X)

In [7]:
X = np.expand_dims(X, -1)
X.shape

(5600, 40, 1)

In [8]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [9]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(128, return_sequences=False, input_shape=(40, 1)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'precision', 'recall', 'f1_score'])
model.summary()

  super().__init__(**kwargs)


In [10]:
history = model.fit(X, y.toarray(), validation_split=0.2, epochs=12, batch_size=512, shuffle=True)

Epoch 1/12
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 71ms/step - accuracy: 0.2626 - f1_score: 0.2175 - loss: 1.8521 - precision: 0.1143 - recall: 1.7857e-04 - val_accuracy: 0.1321 - val_f1_score: 0.1521 - val_loss: 2.0535 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/12
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4566 - f1_score: 0.3713 - loss: 1.5796 - precision: 0.7272 - recall: 0.0609 - val_accuracy: 0.1107 - val_f1_score: 0.1184 - val_loss: 2.0276 - val_precision: 0.3209 - val_recall: 0.0768
Epoch 3/12
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5850 - f1_score: 0.4821 - loss: 1.2467 - precision: 0.7658 - recall: 0.2696 - val_accuracy: 0.1348 - val_f1_score: 0.1614 - val_loss: 1.7984 - val_precision: 0.3109 - val_recall: 0.1071
Epoch 4/12
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6654 - f1_score: 0.5669 - loss: 0.9648 - p

In [11]:
print(history.history)

{'accuracy': [0.3120535612106323, 0.4758928716182709, 0.6064732074737549, 0.667187511920929, 0.7388392686843872, 0.7995535731315613, 0.8482142686843872, 0.8832589387893677, 0.9037946462631226, 0.9084821343421936, 0.9200893044471741, 0.9361607432365417], 'f1_score': [<tf.Tensor: shape=(7,), dtype=float32, numpy=
array([0.51631194, 0.2910321 , 0.27730292, 0.06993002, 0.34151122,
       0.03799651, 0.26284474], dtype=float32)>, <tf.Tensor: shape=(7,), dtype=float32, numpy=
array([0.5843621 , 0.40368265, 0.52376974, 0.09225088, 0.68845886,
       0.02941174, 0.39229667], dtype=float32)>, <tf.Tensor: shape=(7,), dtype=float32, numpy=
array([0.65031314, 0.6061728 , 0.7939393 , 0.13058415, 0.8538092 ,
       0.04968941, 0.44089448], dtype=float32)>, <tf.Tensor: shape=(7,), dtype=float32, numpy=
array([0.69897676, 0.6520681 , 0.9100719 , 0.24927533, 0.9380014 ,
       0.10958902, 0.47251838], dtype=float32)>, <tf.Tensor: shape=(7,), dtype=float32, numpy=
array([0.7415605 , 0.7356912 , 0.942037