In [60]:
import pickle

import librosa
import librosa.feature

import numpy as np
import tensorflow as tf

from scipy import stats

In [61]:
def get_mfcc(y, sr):
    return np.array(librosa.feature.mfcc(y=y, sr=sr))


def get_mel_spectrogram(y, sr):
    return np.array(librosa.feature.melspectrogram(y=y, sr=sr))


def get_chroma_stft(y, sr):
    return np.array(librosa.feature.chroma_stft(y=y, sr=sr))


def get_chroma_cens(y, sr):
    return np.array(librosa.feature.chroma_cens(y=y, sr=sr))

def get_tonnetz(y, sr):
    return np.array(librosa.feature.tonnetz(y=y, sr=sr))


def get_zero_crossing_rate(y):
    return librosa.feature.zero_crossing_rate(y)


def get_spectral_bandwidth(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)


def get_spectral_centroid(y, sr):
    return librosa.feature.spectral_centroid(y=y, sr=sr)


def get_spectral_rolloff(y, sr):
    return librosa.feature.spectral_rolloff(y=y, sr=sr)


def get_spectral_contrast(y, sr):
    return librosa.feature.spectral_contrast(y=y, sr=sr)


def get_poly_features(y, sr):
    return librosa.feature.poly_features(y=y, sr=sr)


def get_feature_stats(values):
    return {
        'mean': np.mean(values, axis=1),
        'std': np.std(values, axis=1),
        'skew': stats.skew(values, axis=1),
        'kurtosis': stats.kurtosis(values, axis=1),
        'median': np.median(values, axis=1),
        'min': np.min(values, axis=1),
        'max': np.max(values, axis=1),
    }


def concatenate_feature_stats(feature_stats):
    return np.concatenate((
        feature_stats['mean'],
        feature_stats['std'],
        feature_stats['skew'],
        feature_stats['kurtosis'],
        feature_stats['median'],
        feature_stats['min'],
        feature_stats['max']
    ))


def get_feature(y, sr):
    mfcc = get_mfcc(y, sr)
    feature_stats = get_feature_stats(mfcc)
    mfcc_feature = concatenate_feature_stats(feature_stats)

    mel_spectrogram = get_mel_spectrogram(y, sr)
    feature_stats = get_feature_stats(mel_spectrogram)
    mel_spectrogram_feature = concatenate_feature_stats(feature_stats)

    chroma_stft = get_chroma_stft(y, sr)
    feature_stats = get_feature_stats(chroma_stft)
    chroma_stft_feature = concatenate_feature_stats(feature_stats)

    chroma_cens = get_chroma_cens(y, sr)
    feature_stats = get_feature_stats(chroma_cens)
    chroma_cens_feature = concatenate_feature_stats(feature_stats)

    tonnetz = get_tonnetz(y, sr)
    feature_stats = get_feature_stats(tonnetz)
    tonnetz_feature = concatenate_feature_stats(feature_stats)

    zero_crossing_rate = get_zero_crossing_rate(y)
    feature_stats = get_feature_stats(zero_crossing_rate)
    zero_crossing_rate_feature = concatenate_feature_stats(feature_stats)

    spectral_bandwidth = get_spectral_bandwidth(y, sr)
    feature_stats = get_feature_stats(spectral_bandwidth)
    spectral_bandwidth_feature = concatenate_feature_stats(feature_stats)

    spectral_rolloff = get_spectral_rolloff(y, sr)
    feature_stats = get_feature_stats(spectral_rolloff)
    spectral_rolloff_feature = concatenate_feature_stats(feature_stats)

    spectral_centroid = get_spectral_centroid(y, sr)
    feature_stats = get_feature_stats(spectral_centroid)
    spectral_centroid_feature = concatenate_feature_stats(feature_stats)

    spectral_contrast = get_spectral_contrast(y, sr)
    feature_stats = get_feature_stats(spectral_contrast)
    spectral_contrast_feature = concatenate_feature_stats(feature_stats)

    poly_features = get_poly_features(y, sr)
    feature_stats = get_feature_stats(poly_features)
    poly_features_feature = concatenate_feature_stats(feature_stats)

    return np.concatenate((
        chroma_stft_feature,
        chroma_cens_feature,
        mel_spectrogram_feature,
        mfcc_feature,
        tonnetz_feature,
        zero_crossing_rate_feature,
        spectral_bandwidth_feature,
        spectral_centroid_feature,
        spectral_rolloff_feature,
        spectral_contrast_feature,
        poly_features_feature,
    ))


In [62]:
def generate_features():
    with open('../pickles/processed_data.pkl', 'rb') as f:
        data = pickle.load(f)
    xs = []
    ys = []
    filenames = []
    for label, y, sr, filename in data:
        features = get_feature(y, sr)
        xs.append(features)
        ys.append(label)
        filenames.append(filename)
    features = np.array(xs)
    labels = np.array(ys)

    with open('track_features.pkl', 'wb') as f:
        pickle.dump({"features": features, "labels": labels, "filenames": filenames}, f)


In [63]:
generate_features()

In [64]:
model = tf.keras.models.load_model('../saved_models/random_forest_3')
model.summary()


[INFO 23-05-27 18:39:53.8020 BST kernel.cc:1242] Loading model from path ../saved_models/random_forest_3/assets/ with prefix 4d3d74283d62459d


Model: "random_forest_model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1
Trainable params: 0
Non-trainable params: 1
_________________________________________________________________


[INFO 23-05-27 18:39:54.0989 BST decision_forest.cc:660] Model loaded with 300 root(s), 56508 node(s), and 1325 input feature(s).
[INFO 23-05-27 18:39:54.0990 BST abstract_model.cc:1312] Engine "RandomForestGeneric" built
[INFO 23-05-27 18:39:54.0995 BST kernel.cc:1074] Use fast generic engine


In [65]:
def load_data():
    with open('./track_features.pkl', 'rb') as f:
        data = pickle.load(f)

    return data


In [66]:
data = load_data()
data


{'features': array([[ 4.15284574e-01,  1.88751370e-01,  2.38558665e-01, ...,
          1.90051907e-01, -2.50370455e-05,  5.26561333e+00],
        [ 3.05086821e-01,  4.11871135e-01,  3.02782238e-01, ...,
          6.05055444e-01, -7.51342267e-05,  8.62808626e+00],
        [ 4.24001545e-01,  3.10383350e-01,  3.85087997e-01, ...,
          9.83716555e-01, -1.22716268e-04,  1.23256043e+01],
        ...,
        [ 3.03198457e-01,  3.16565067e-01,  5.12915492e-01, ...,
          2.05824098e+00, -1.98326703e-04,  1.11196818e+01],
        [ 5.12703180e-01,  3.51487219e-01,  3.12366277e-01, ...,
          2.40252496e+00, -1.45308054e-04,  9.23447969e+00],
        [ 3.60429823e-01,  4.77471173e-01,  4.03136909e-01, ...,
          4.81110826e+00, -5.73429526e-04,  1.31817255e+01]]),
 'labels': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [67]:
embeddings = model.predict(tf.constant(data["features"]))
embeddings




array([[0.33666644, 0.11333329, 0.15999992, ..., 0.01666667, 0.07666666,
        0.07333333],
       [0.6733328 , 0.00333333, 0.11666662, ..., 0.00666667, 0.07999999,
        0.06666667],
       [0.68666613, 0.00333333, 0.03666667, ..., 0.00333333, 0.01333333,
        0.05333334],
       ...,
       [0.06666667, 0.00333333, 0.02333334, ..., 0.        , 0.00333333,
        0.55666625],
       [0.03333334, 0.00333333, 0.15333326, ..., 0.04333334, 0.02666667,
        0.54666626],
       [0.08666665, 0.00333333, 0.03666667, ..., 0.01      , 0.01      ,
        0.6166662 ]], dtype=float32)

In [68]:
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(n_neighbors=25)
knn.fit(embeddings)
# select indices of k nearest neighbours
neighbours = knn.kneighbors([embeddings[250]], return_distance = False)
neighbours

array([[250, 230, 222, 527, 229, 259, 125, 261, 294, 233, 197, 296, 919,
        127, 262, 234, 274, 252, 216, 585, 306, 958, 224, 213, 206]])

In [69]:
for index in neighbours[0]:
    print(data["filenames"][index])


country.00036.wav
country.00017.wav
country.00067.wav
jazz.00076.wav
country.00029.wav
country.00030.wav
classical.00056.wav
country.00019.wav
country.00047.wav
country.00016.wav
classical.00048.wav
country.00052.wav
rock.00099.wav
classical.00054.wav
country.00031.wav
country.00012.wav
country.00069.wav
country.00023.wav
country.00064.wav
jazz.00008.wav
disco.00047.wav
rock.00045.wav
country.00028.wav
country.00065.wav
country.00048.wav
