In [20]:
import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from datetime import datetime
from sklearn import metrics
from tqdm import tqdm

In [27]:
df = pd.read_csv('data.csv')

In [None]:
df['the extracted feature'] = df['the extracted feature'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

In [36]:
X=np.array(df['the extracted feature'].tolist())
y=np.array(df['class'].tolist())

In [37]:
print(X.shape , y.shape)

(587, 40) (587,)


In [38]:
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [39]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [40]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [41]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((469, 40), (118, 40), (469, 10), (118, 10))

In [42]:
from tensorflow.keras.regularizers import l1
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization, LeakyReLU

num_labels = y.shape[1]

model = Sequential()

model.add(Dense(100, input_dim=40, kernel_regularizer=l1(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.05))
model.add(Dropout(0.5))

model.add(Dense(200, kernel_regularizer=l1(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.05))
model.add(Dropout(0.5))

model.add(Dense(100, kernel_regularizer=l1(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.05))
model.add(Dropout(0.5))

model.add(Dense(num_labels, activation='softmax'))

In [43]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [44]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 100)               4100      
                                                                 
 batch_normalization_3 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 100)               0         
                                                                 
 dropout_3 (Dropout)         (None, 100)               0         
                                                                 
 dense_5 (Dense)             (None, 200)               20200     
                                                                 
 batch_normalization_4 (Batc  (None, 200)              800       
 hNormalization)                                      

In [48]:
num_epochs = 100
num_batch_size = 16

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5',
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/100
 1/30 [>.............................] - ETA: 0s - loss: 2.9860 - accuracy: 0.3750
Epoch 1: val_loss improved from inf to 2.69833, saving model to saved_models/audio_classification.hdf5
Epoch 2/100
 1/30 [>.............................] - ETA: 0s - loss: 3.1134 - accuracy: 0.3125
Epoch 2: val_loss improved from 2.69833 to 2.66870, saving model to saved_models/audio_classification.hdf5
Epoch 3/100
 1/30 [>.............................] - ETA: 0s - loss: 3.4545 - accuracy: 0.3750
Epoch 3: val_loss improved from 2.66870 to 2.66209, saving model to saved_models/audio_classification.hdf5
Epoch 4/100
 1/30 [>.............................] - ETA: 0s - loss: 2.8774 - accuracy: 0.5625
Epoch 4: val_loss improved from 2.66209 to 2.65577, saving model to saved_models/audio_classification.hdf5
Epoch 5/100
 1/30 [>.............................] - ETA: 0s - loss: 2.8834 - accuracy: 0.3750
Epoch 5: val_loss improved from 2.65577 to 2.64290, saving model to saved_models/audio_classification

## Testing

In [50]:
import librosa

def features_extractor(audio_path):
    audio, sample_rate = librosa.load(audio_path, res_type='scipy')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

    return mfccs_scaled_features

In [None]:
filename = "./raaga/bhairavi/bhairavi_1.wav"
mfccs_scaled_features = features_extractor(filename)
mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)

predictions = model.predict(mfccs_scaled_features)
predicted_label = np.argmax(predictions, axis=1)

prediction_class = labelencoder.inverse_transform(predicted_label)

print("Predicted Label:", predicted_label)
print("Prediction Class:", prediction_class)

Predicted Label: [2]
Prediction Class: ['bhairavi']


In [54]:
filename = "./raaga/dkanada/dkanada_2.wav"

mfccs_scaled_features = features_extractor(filename)
mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)

predictions = model.predict(mfccs_scaled_features)
predicted_label = np.argmax(predictions, axis=1)

prediction_class = labelencoder.inverse_transform(predicted_label)

print("Predicted Label:", predicted_label)
print("Prediction Class:", prediction_class)

Predicted Label: [6]
Prediction Class: ['dkanada']
