In [None]:
import librosa 
import librosa.feature 
import librosa.display 
import glob 
import numpy as np 
import matplotlib.pyplot as plt 
from keras.models import Sequential 
from keras.layers import Dense, Activation 
from keras.utils import to_categorical

In [None]:
def display_mfcc(song): 
    y, _ =  librosa.load(song) 
    mfcc = librosa.feature.mfcc(y=y, sr=_) 
    plt.figure(figsize=(10, 4)) 
    librosa.display.specshow(mfcc, x_axis='time', y_axis='mel') 
    plt.colorbar() 
    plt.title(song) 
    plt.tight_layout() 
    plt.show()
    print(y, mfcc, _)
    print(len(y), len(mfcc))

In [None]:
display_mfcc('genres/rock/rock.00000.wav')

In [None]:
def extract_features_song(f):
    y, _ = librosa.load(f)

    #mel frequency cepstral coefficient
    mfcc = librosa.feature.mfcc(y=y)

    #normalize

    mfcc /= np.amax(np.absolute(mfcc))

    return np.ndarray.flatten(mfcc)[:25000]

In [None]:
def generate_features_and_labels(): 
    all_features = [] 
    all_labels = [] 
    
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] 
    for genre in genres: 
        sound_files = glob.glob(f'genres/{genre}/{genre}.*.wav')
        print('Processing %d songs in %s genre...' % (len(sound_files), genre)) 
        for f in sound_files: 
            features = extract_features_song(f) 
            all_features.append(features) 
            all_labels.append(genre) 
            
    #convert Labels to one-hot encoding 
    label_uniq_ids, label_row_ids = np.unique(all_labels, return_inverse=True) 
    label_row_ids = label_row_ids.astype(np.int32, copy=False) 
    onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids)) 
    return np.stack(all_features), onehot_labels

In [None]:
features, labels = generate_features_and_labels()

In [None]:
len(features)

In [None]:
labels

In [None]:
len(labels)

In [None]:
print(np.shape(features)) 
print(np.shape(labels)) 

training_split = 0.8 

# Last column has genre, turn it into unique ids 
alldata = np.column_stack((features, labels)) 

np.random.shuffle(alldata) 
splitidx= int(len(alldata) * training_split)
train, test = alldata[:splitidx,:], alldata[splitidx:,:] 

print(np.shape(train)) 
print(np.shape(test)) 

train_input = train[:,:-10] 
train_labels = train[:,-10:] 

test_input = test[:,:-10] 
test_labels = test[:,-10:] 
           
print(np.shape(train_input)) 
print(np.shape(train_labels))

In [None]:
model = Sequential([ 
    Dense (100, input_dim=np.shape(train_input)[1]), 
    Activation('relu'), 
    Dense(10), 
    Activation('softmax'), 
]) 

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy']) 

print(model.summary()) 

model.fit(train_input, train_labels, epochs=20, batch_size=32, 
          validation_split=0.2) 

loss, acc = model.evaluate(test_input, test_labels, batch_size=32) 

print("Done!") 
print("Loss: %.4f, accuracy: %.4f" % (loss, acc))

In [None]:
train_input[0]

In [None]:
from keras.layers import LeakyReLU

model = Sequential([ 
    Dense (100, input_dim=np.shape(train_input)[1]), 
    Activation('relu'), 
    Dense(128), 
    Activation('relu'),
    Dense(64),
    Activation('relu'),
    Dense(256),
    LeakyReLU(alpha=0.1),
    Dense(10), 
    Activation('softmax'), 
]) 

from keras.optimizers import Adam

optimizer = Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy']) 

print(model.summary()) 

model.fit(train_input, train_labels, epochs=50, batch_size=32, 
          validation_split=0.2) 

loss, acc = model.evaluate(test_input, test_labels, batch_size=32) 

print("Done!") 
print("Loss: %.4f, accuracy: %.4f" % (loss, acc))