In [1]:
import os

import tensorflow as tf
from pydub import AudioSegment
import librosa
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import (Input, Activation, BatchNormalization, Flatten,
                                     Conv2D, MaxPooling2D, Dense)
from statistics import mode
import utils

In [2]:
def load_model(model_path):
    X_input = Input((288, 432, 4))

    X = Conv2D(8, kernel_size=(3, 3), strides=(1, 1))(X_input)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)

    X = Conv2D(16, kernel_size=(3, 3), strides=(1, 1))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)

    X = Conv2D(32, kernel_size=(3, 3), strides=(1, 1))(X)
    X = BatchNormalization(axis=3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)

    X = Conv2D(64, kernel_size=(3, 3), strides=(1, 1))(X)
    X = BatchNormalization(axis=-1)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)

    X = Conv2D(128, kernel_size=(3, 3), strides=(1, 1))(X)
    X = BatchNormalization(axis=-1)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)

    X = Conv2D(256, kernel_size=(3, 3), strides=(1, 1))(X)
    X = BatchNormalization(axis=-1)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((2, 2))(X)
    X = Flatten()(X)

    X = Dense(8, activation='softmax', name='fc' + str(8),
              kernel_initializer=tf.keras.initializers.glorot_uniform(seed=9))(X)

    model = tf.keras.models.Model(inputs=X_input, outputs=X, name='GenreModel')

    model.load_weights(model_path)
    return model


def convert_mp32wav(file_path):
    sound = AudioSegment.from_file(file_path)
    sound.set_frame_rate(22050)
    sound.export("./reference_samples/ref_full.wav", format='wav')


def extract_mel(wav_file_path):
    """
    Cut off the first 30 seconds of the song and calculate the mel-spectrums
    The input file must be in wav format
    """
    wav = AudioSegment.from_file(wav_file_path)
    for i in range(0, 10):
        wav3sec = wav[i * 3 * 1000: (i + 1) * 3 * 1000]
        samples = wav3sec.get_array_of_samples()
        arr = np.array(samples).astype(np.float32)
        mels = librosa.feature.melspectrogram(y=arr, sr=22050, n_mels=128, fmax=8000)
        mels = librosa.power_to_db(mels, ref=np.max)

        fig = plt.Figure()
        canvas = FigureCanvas(fig)
        p = plt.imshow(mels)

        plt.savefig(f"./reference_samples/sample_melspec{i}.png")
        plt.close()


def predict(song_path, _model):
    convert_mp32wav(song_path)
    extract_mel("./reference_samples/ref_full.wav")
    labels = []
    for i in range(0, 10):
        image = load_img(f"./reference_samples/sample_melspec{i}.png",
                         color_mode='rgba',
                         target_size=(288, 432))

        image = img_to_array(image)
        image = np.reshape(image, (1, 288, 432, 4))

        prediction = _model.predict(image / 255)
        prediction = prediction.reshape((8,))

        class_label = np.argmax(prediction)
        labels.append(class_label)
    return mode(labels)

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

genre_model = tf.keras.models.load_model("./model/model.h5")

In [4]:
classes = ["Electronic",
           "Experimental",
           "Folk",
           "Hip-Hop",
           "Instrumental",
           "International",
           "Pop",
           "Rock"]

In [15]:
## test the model performance
_label = predict("../test_playlist/Paris In The Rain - Lauv.mp3",
                 genre_model)
print(classes[_label])

Experimental


In [5]:
## give recommendations based on the playlist
## first load the list of songs from FMA
metafile_path = "/home/minhhiu/MyProjects/music_data/fma_metadata"

# load meta data
tracks = utils.load(os.path.join(metafile_path, 'tracks.csv'))

tracks_small = tracks[tracks['set', 'subset'] <= 'small']
tracks = tracks_small['track']
artists = tracks_small['artist']
# print(tracks_small)

In [6]:
## number of songs that will be recommended
recommendation_num = 10

genre_distribution = {"Electronic": 0,
                      "Experimental": 0,
                      "Folk": 0,
                      "Hip-Hop": 0,
                      "Instrumental": 0,
                      "International": 0,
                      "Pop": 0,
                      "Rock": 0}

genre_count = {"Electronic": 0,
               "Experimental": 0,
               "Folk": 0,
               "Hip-Hop": 0,
               "Instrumental": 0,
               "International": 0,
               "Pop": 0,
               "Rock": 0}

playlist_path = "../test_playlist/"
playlist = os.listdir(playlist_path)
total_songs_in_playlist = len(os.listdir(playlist_path))

## calculate genre distribution over the playlist
for song in playlist:
    genre = classes[
        predict(os.path.join(playlist_path, song), genre_model)
    ]
    genre_count[genre] += 1
    genre_distribution[genre] = genre_count[genre] / total_songs_in_playlist

In [10]:
## my playlist
print("My Playlist:")
for song in os.listdir(playlist_path):
    print(song.strip('.mp3'))

recommendation_list = []
for key in genre_distribution.keys():
    songs_num = genre_distribution[key] * recommendation_num
    if songs_num == 0:
        continue
    _tracks = tracks[tracks['genre_top'] == key]

    rows = np.random.choice(_tracks.index.values, int(songs_num))
    recommend_tracks = _tracks.loc[rows, :]
    correspond_artist = artists.loc[rows, :]

    for idx in recommend_tracks.index.values:
        obj = {
            "Title": recommend_tracks.loc[idx, :]['title'],
            "Artist": artists.loc[idx, :]['name']
        }
        recommendation_list.append(obj)

print()
print("Recommendations:")
for obj in recommendation_list:
    print(f"{obj['Title']} - {obj['Artist']}")

My Playlist:
Enchanted - Taylor Swift
Lo Xa - Tien Tien
New Divide - Linkin Park
I Like Me Better - Lauv
Dancing With Your Ghost - Sasha Sloan
Numb - Linkin Park
Paris In The Rain - Lauv
Say You Do - Tien Tien
Disconnected - Pegboard Nerds
You Belong With Me Taylor___s Version_ -

Recommendations:
After the War (ft. Alex, MoShang, HEJ31) - SackJo22
Bahia - Prince Rama
high fidelity - File Under Toner
Week One - Ben von Wildenhaus
Prairie Kids - Meaner Pencil
Opotiki Over and Out - krackatoa
Halloween Night - Monk Turner
Thinking Of You - Teeel
Be Electric - Rockit Maxx
While you're waiting for the revolution - Action Will Be Taken
