In [None]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from pathlib import Path
import tensorflow as tf
from collections import Counter
import librosa
import math

In [None]:
def load_data(dataset_path):
    #open file
    with open(dataset_path, "r") as fp:
        data = json.load(fp)
    
    #convert lists in to numpy arrays
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])
    
    return inputs, targets

In [None]:
def plot_history(history):
    figure, axs = plt.subplots(2)

    # accuracy subplot
    axs[0].plot(history.history["accuracy"], label="Train Accuracy")
    axs[0].plot(history.history["val_accuracy"], label="Test Accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy Eval")

    # error subplot
    axs[1].plot(history.history["loss"], label="Train Error")
    axs[1].plot(history.history["val_loss"], label="Test Error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error Eval")
    
    plt.show()

In [None]:
inputs, targets = load_data("processed.json") # load all the data

In [None]:
print(inputs.shape)
print(targets.shape)

In [None]:
#split data into train and test
X_train, X_test, y_train, y_test = train_test_split(inputs, targets, test_size=0.3)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# build the model architecture
model = keras.Sequential([
    #input layer
    keras.layers.Flatten(input_shape=(inputs.shape[1], inputs.shape[2])),
    
    #hidden layers
    keras.layers.Dense(units=512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(units=256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(units=64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    
    #output layer
    keras.layers.Dense(units=10, activation='linear')
])

optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
model.summary()

In [None]:
# train the model on training set
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32)

In [None]:
plot_history(history)

In [None]:
model.save("weights/weights.keras")

In [None]:
mapping = [
    "blues",
    "classical",
    "country",
    "disco",
    "hiphop",
    "jazz",
    "metal",
    "pop",
    "reggae",
    "rock",
]

In [None]:
# Function to extract MFCCs from audio file
def extract_mfccs_from_audio(
    file_path,
    segment_duration=3,
    n_mfcc=13,
    n_fft=2048,
    hop_length=512,
    sample_rate=22050,
):
    signal, sr = librosa.load(file_path, sr=sample_rate)

    # Calculate the number of samples per segment
    samples_per_segment = sample_rate * segment_duration
    expected_vector_length = math.ceil(samples_per_segment / hop_length)

    mfccs = []
    num_segments = int(len(signal) / samples_per_segment)

    for s in range(num_segments):
        start_sample = samples_per_segment * s
        finish_sample = start_sample + samples_per_segment

        if finish_sample > len(signal):
            break

        mfcc = librosa.feature.mfcc(
            y=signal[start_sample:finish_sample],
            sr=sr,
            n_fft=n_fft,
            n_mfcc=n_mfcc,
            hop_length=hop_length,
        )
        mfcc = mfcc.T

        if len(mfcc) == expected_vector_length:
            mfccs.append(mfcc.tolist())

    return np.array(mfccs)

In [None]:
# Load the weights
model.load_weights("weights/weights.keras")

In [None]:
mfccs = extract_mfccs_from_audio("sample songs/sudno.mp3")
predictions = model.predict(mfccs)

probabilities = tf.nn.softmax(predictions, axis=-1)

predicted_classes = np.argmax(probabilities, axis=1)

class_counts = Counter(predicted_classes)
most_common_class = mapping[class_counts.most_common(1)[0][0]]

print(f"The predicted class for the song is: {most_common_class}")