In [6]:
import librosa
import os
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
def get_mfcc(file_path, sample_rate=22050, n_mfcc=13, n_fft=2048, hop_length=512):
    y, sr = librosa.load(file_path, sr=sample_rate)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length).T
    mfcc_mean = np.mean(mfcc, axis=0)
    feature_vector = np.array(mfcc_mean)
    return feature_vector

In [3]:
def process_data(dataset_path):
    category_to_num = dict()
    category_to_num["blues"] = 0
    category_to_num["classical"] = 1
    category_to_num["country"] = 2
    category_to_num["disco"] = 3
    category_to_num["hiphop"] = 4
    category_to_num["jazz"] = 5
    category_to_num["metal"] = 6
    category_to_num["pop"] = 7
    category_to_num["reggae"] = 8
    category_to_num["rock"] = 9
    
    X = []
    y = []

    for folder in os.listdir(dataset_path):
        print("Processing: " + folder)  
        for file in os.listdir(dataset_path+ "/" + folder):
            file_path = dataset_path + "/" + folder + "/" + file
            feature_vector = get_mfcc(file_path)
            X.append(feature_vector)
            y.append(category_to_num[folder])
                  
    return np.array(X), np.array(y)    

In [4]:
# process data
audio_dir = "./Data/genres_original"
X, y = process_data(audio_dir)

Processing: blues
Processing: classical
Processing: country
Processing: disco
Processing: hiphop
Processing: jazz
Processing: metal
Processing: pop
Processing: reggae
Processing: rock


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


# normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
import tensorflow.keras as keras

"""
Code for this part of the implementation adapted from: https://www.youtube.com/watch?v=_xcFAiufwd0
Our way is slightly different due to the difference in how we collected the data, 
but this helped with understanding the exact syntax of setting up the model.
"""

model = keras.Sequential([

    # input layer
    keras.layers.Input(shape=(X_train.shape[1],)),

    # 1st dense layer
    keras.layers.Dense(512, activation='relu'),

    # 2nd dense layer
    keras.layers.Dense(256, activation='relu'),

    # 3rd dense layer
    keras.layers.Dense(64, activation='relu'),

    # output layer
    keras.layers.Dense(10, activation='softmax')
])
    
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

model.summary()

# train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=10, epochs=50)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 512)               7168      
                                                                 
 dense_9 (Dense)             (None, 256)               131328    
                                                                 
 dense_10 (Dense)            (None, 64)                16448     
                                                                 
 dense_11 (Dense)            (None, 10)                650       
                                                                 
Total params: 155,594
Trainable params: 155,594
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/