In [17]:
import os, time, librosa, librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tensorflow.keras import utils
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop, Adam

import audio_data_loader as audioloader
import references as ref

In [2]:
def get_features(y, sr):
    out = []
    
    out.append(np.mean(librosa.feature.rms(y = y)))
    out.append(np.mean(librosa.feature.spectral_centroid(y = y, sr = sr)))
    out.append(np.mean(librosa.feature.spectral_bandwidth(y = y, sr = sr)))
    out.append(np.mean(librosa.feature.spectral_rolloff(y = y, sr = sr))) 
    out.append(np.mean(librosa.feature.zero_crossing_rate(y)))
    
    for e in librosa.feature.mfcc(y = y, sr = sr):
        out.append(np.min(e))
        out.append(np.mean(e))
        out.append(np.max(e))
        
   
    for e in librosa.feature.chroma_stft(y = y, sr = sr):
        out.append(np.min(e))
        out.append(np.mean(e))
        out.append(np.max(e))

    return out

In [3]:
def get_dataset(source):
    
    x_train = []
    y_train = []
    
    for i, item in tqdm(source.iterrows()):
        
        try:
            # read the audio file
            y, sr = audioloader.read_audio(item[0])
            
            # append x_train item
            x_train.append(get_features(y, sr))
            y_train.append(utils.to_categorical(item[8], len(ref.gender_ref) + 1))
            
        except:
            
            print(f'Invalid object {item[0]}')
        
        
    return np.array(x_train), np.array(y_train)

In [4]:
source = audioloader.create_source_dataset(ref.root_path)

Data source array is prepared - 0.03191423416137695 c


In [5]:
x, y = get_dataset(source)

1440it [03:59,  6.02it/s]


In [6]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state = 42, shuffle = True)

In [7]:
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

(1296, 101)
(144, 101)
(1296, 3)
(144, 3)


In [20]:
model = Sequential()
model.add(Dense(200, activation = 'relu', input_shape=(101,)))
model.add(Dense(120, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(3, activation = 'softmax'))

model.compile(optimizer = Adam(learning_rate = 0.001),
                  loss = 'categorical_crossentropy',
                  metrics = ['accuracy'])

In [21]:
model.fit(x_train, y_train, batch_size = 50, epochs = 50, validation_split = 0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1a11a8dc820>

In [22]:
pred = model.evaluate(x_val, y_val)

