In [61]:
import os
from tinytag import TinyTag, TinyTagException
from sklearn.neighbors import NearestNeighbors
from collections import defaultdict
from keras.models import load_model
import librosa
from collections import Counter
import multiprocessing
from tqdm import tqdm
from keras.models import Model
import numpy as np
import sounddevice as sd

In [3]:
MUSIC_ROOT = '/Users/douwe/Songs/'
mp3s = []
for root, subdirs, files in os.walk(MUSIC_ROOT):
    for fn in files:
        if fn.endswith('.mp3'):
            mp3s.append(os.path.join(root, fn))
len(mp3s)

7458

In [105]:
TO_SKIP = {'Podcast', 'Books & Spoken'}

def process_mp3(path):
    try:
        tag = TinyTag.get(path)
        if tag.genre in TO_SKIP:
            return None
    except TinyTagException:
        print('error')
        return None
    signal, sr = librosa.load(path, res_type='kaiser_fast', offset=30, duration=30)
    try:
        melspec = librosa.feature.melspectrogram(signal, sr=sr).T[:1280,]
        if len(melspec) != 1280:
            return None
    except ValueError:
        return None
    return {'path': path,
            'melspecs': np.asarray(np.split(melspec, 10)),
            'tag': tag}

songs = [process_mp3(path) for path in tqdm(mp3s[:1000])]
songs = [song for song in songs if song]





  0%|          | 0/1000 [00:00<?, ?it/s][A[A[A[A



  0%|          | 1/1000 [00:00<07:45,  2.15it/s][A[A[A[A



  0%|          | 2/1000 [00:00<08:01,  2.07it/s][A[A[A[A



  0%|          | 3/1000 [00:01<08:11,  2.03it/s][A[A[A[A



  0%|          | 4/1000 [00:02<08:23,  1.98it/s][A[A[A[A



  0%|          | 5/1000 [00:02<08:23,  1.98it/s][A[A[A[A



  1%|          | 6/1000 [00:03<08:23,  1.97it/s][A[A[A[A



  1%|          | 7/1000 [00:03<08:22,  1.98it/s][A[A[A[A



  1%|          | 8/1000 [00:04<08:18,  1.99it/s][A[A[A[A



  1%|          | 9/1000 [00:04<08:20,  1.98it/s][A[A[A[A



  1%|          | 10/1000 [00:05<08:19,  1.98it/s][A[A[A[A



  1%|          | 11/1000 [00:05<08:18,  1.99it/s][A[A[A[A



  1%|          | 12/1000 [00:06<08:19,  1.98it/s][A[A[A[A



  1%|▏         | 13/1000 [00:06<08:21,  1.97it/s][A[A[A[A



  1%|▏         | 14/1000 [00:07<08:18,  1.98it/s][A[A[A[A



  2%|▏         | 15/1000 [00:07<08:18, 

 13%|█▎        | 134/1000 [01:00<06:31,  2.21it/s][A[A[A[A



 14%|█▎        | 135/1000 [01:01<06:31,  2.21it/s][A[A[A[A



 14%|█▎        | 136/1000 [01:01<06:31,  2.21it/s][A[A[A[A



 14%|█▎        | 137/1000 [01:02<06:30,  2.21it/s][A[A[A[A



 14%|█▍        | 138/1000 [01:02<06:30,  2.21it/s][A[A[A[A



 14%|█▍        | 139/1000 [01:03<06:30,  2.21it/s][A[A[A[A



 14%|█▍        | 140/1000 [01:03<06:30,  2.20it/s][A[A[A[A



 14%|█▍        | 141/1000 [01:03<06:29,  2.20it/s][A[A[A[A



 14%|█▍        | 142/1000 [01:04<06:29,  2.20it/s][A[A[A[A



 14%|█▍        | 143/1000 [01:04<06:29,  2.20it/s][A[A[A[A



 14%|█▍        | 144/1000 [01:05<06:28,  2.20it/s][A[A[A[A



 14%|█▍        | 145/1000 [01:05<06:28,  2.20it/s][A[A[A[A



 15%|█▍        | 146/1000 [01:06<06:28,  2.20it/s][A[A[A[A



 15%|█▍        | 147/1000 [01:06<06:28,  2.20it/s][A[A[A[A



 15%|█▍        | 148/1000 [01:07<06:27,  2.20it/s][A[A[A[A



 15%|█▍   

 33%|███▎      | 331/1000 [02:05<04:13,  2.63it/s][A[A[A[A



 33%|███▎      | 332/1000 [02:06<04:13,  2.63it/s][A[A[A[A



 33%|███▎      | 333/1000 [02:06<04:13,  2.63it/s][A[A[A[A



 33%|███▎      | 334/1000 [02:07<04:13,  2.63it/s][A[A[A[A



 34%|███▎      | 335/1000 [02:07<04:13,  2.63it/s][A[A[A[A



 34%|███▎      | 336/1000 [02:08<04:13,  2.62it/s][A[A[A[A



 34%|███▎      | 337/1000 [02:08<04:12,  2.62it/s][A[A[A[A



 34%|███▍      | 338/1000 [02:09<04:12,  2.62it/s][A[A[A[A



 34%|███▍      | 339/1000 [02:09<04:12,  2.62it/s][A[A[A[A



 34%|███▍      | 340/1000 [02:10<04:12,  2.61it/s][A[A[A[A



 34%|███▍      | 341/1000 [02:10<04:12,  2.61it/s][A[A[A[A



 34%|███▍      | 342/1000 [02:11<04:12,  2.61it/s][A[A[A[A



 34%|███▍      | 343/1000 [02:11<04:12,  2.61it/s][A[A[A[A



 34%|███▍      | 344/1000 [02:12<04:11,  2.60it/s][A[A[A[A



 34%|███▍      | 345/1000 [02:12<04:11,  2.60it/s][A[A[A[A



 35%|███▍ 

 46%|████▌     | 457/1000 [03:06<03:42,  2.44it/s][A[A[A[A



 46%|████▌     | 458/1000 [03:07<03:41,  2.44it/s][A[A[A[A



 46%|████▌     | 459/1000 [03:07<03:41,  2.44it/s][A[A[A[A



 46%|████▌     | 460/1000 [03:08<03:41,  2.44it/s][A[A[A[A



 46%|████▌     | 461/1000 [03:08<03:40,  2.44it/s][A[A[A[A



 46%|████▌     | 462/1000 [03:09<03:40,  2.44it/s][A[A[A[A



 46%|████▋     | 463/1000 [03:09<03:40,  2.44it/s][A[A[A[A



 46%|████▋     | 464/1000 [03:10<03:39,  2.44it/s][A[A[A[A



 46%|████▋     | 465/1000 [03:10<03:39,  2.44it/s][A[A[A[A



 47%|████▋     | 466/1000 [03:11<03:39,  2.44it/s][A[A[A[A



 47%|████▋     | 467/1000 [03:11<03:38,  2.44it/s][A[A[A[A



 47%|████▋     | 468/1000 [03:12<03:38,  2.44it/s][A[A[A[A



 47%|████▋     | 469/1000 [03:12<03:37,  2.44it/s][A[A[A[A



 47%|████▋     | 470/1000 [03:12<03:37,  2.44it/s][A[A[A[A



 47%|████▋     | 471/1000 [03:13<03:37,  2.44it/s][A[A[A[A



 47%|████▋

 58%|█████▊    | 582/1000 [04:07<02:57,  2.35it/s][A[A[A[A



 58%|█████▊    | 583/1000 [04:07<02:57,  2.35it/s][A[A[A[A



 58%|█████▊    | 584/1000 [04:08<02:56,  2.35it/s][A[A[A[A



 58%|█████▊    | 585/1000 [04:08<02:56,  2.35it/s][A[A[A[A



 59%|█████▊    | 586/1000 [04:09<02:56,  2.35it/s][A[A[A[A



 59%|█████▊    | 587/1000 [04:09<02:55,  2.35it/s][A[A[A[A



 59%|█████▉    | 588/1000 [04:10<02:55,  2.35it/s][A[A[A[A



 59%|█████▉    | 589/1000 [04:10<02:55,  2.35it/s][A[A[A[A



 59%|█████▉    | 590/1000 [04:11<02:54,  2.35it/s][A[A[A[A



 59%|█████▉    | 591/1000 [04:11<02:54,  2.35it/s][A[A[A[A



 59%|█████▉    | 592/1000 [04:12<02:53,  2.35it/s][A[A[A[A



 59%|█████▉    | 593/1000 [04:12<02:53,  2.35it/s][A[A[A[A



 59%|█████▉    | 594/1000 [04:13<02:53,  2.34it/s][A[A[A[A



 60%|█████▉    | 595/1000 [04:13<02:52,  2.34it/s][A[A[A[A



 60%|█████▉    | 596/1000 [04:14<02:52,  2.34it/s][A[A[A[A



 60%|█████

 71%|███████   | 706/1000 [05:08<02:08,  2.29it/s][A[A[A[A



 71%|███████   | 707/1000 [05:08<02:07,  2.29it/s][A[A[A[A



 71%|███████   | 708/1000 [05:09<02:07,  2.29it/s][A[A[A[A



 71%|███████   | 709/1000 [05:09<02:07,  2.29it/s][A[A[A[A



 71%|███████   | 710/1000 [05:10<02:06,  2.29it/s][A[A[A[A



 71%|███████   | 711/1000 [05:10<02:06,  2.29it/s][A[A[A[A



 71%|███████   | 712/1000 [05:11<02:05,  2.29it/s][A[A[A[A



 71%|███████▏  | 713/1000 [05:11<02:05,  2.29it/s][A[A[A[A



 71%|███████▏  | 714/1000 [05:12<02:04,  2.29it/s][A[A[A[A



 72%|███████▏  | 715/1000 [05:12<02:04,  2.29it/s][A[A[A[A



 72%|███████▏  | 716/1000 [05:12<02:04,  2.29it/s][A[A[A[A



 72%|███████▏  | 717/1000 [05:13<02:03,  2.29it/s][A[A[A[A



 72%|███████▏  | 718/1000 [05:13<02:03,  2.29it/s][A[A[A[A



 72%|███████▏  | 719/1000 [05:14<02:02,  2.29it/s][A[A[A[A



 72%|███████▏  | 720/1000 [05:14<02:02,  2.29it/s][A[A[A[A



 72%|█████

 85%|████████▌ | 851/1000 [06:09<01:04,  2.31it/s][A[A[A[A



 85%|████████▌ | 852/1000 [06:09<01:04,  2.30it/s][A[A[A[A



 85%|████████▌ | 853/1000 [06:10<01:03,  2.30it/s][A[A[A[A



 85%|████████▌ | 854/1000 [06:10<01:03,  2.30it/s][A[A[A[A



 86%|████████▌ | 855/1000 [06:11<01:02,  2.30it/s][A[A[A[A



 86%|████████▌ | 856/1000 [06:11<01:02,  2.30it/s][A[A[A[A



 86%|████████▌ | 857/1000 [06:12<01:02,  2.30it/s][A[A[A[A



 86%|████████▌ | 858/1000 [06:12<01:01,  2.30it/s][A[A[A[A



 86%|████████▌ | 859/1000 [06:13<01:01,  2.30it/s][A[A[A[A



 86%|████████▌ | 860/1000 [06:13<01:00,  2.30it/s][A[A[A[A



 86%|████████▌ | 861/1000 [06:14<01:00,  2.30it/s][A[A[A[A



 86%|████████▌ | 862/1000 [06:14<00:59,  2.30it/s][A[A[A[A



 86%|████████▋ | 863/1000 [06:15<00:59,  2.30it/s][A[A[A[A



 86%|████████▋ | 864/1000 [06:15<00:59,  2.30it/s][A[A[A[A



 86%|████████▋ | 865/1000 [06:16<00:58,  2.30it/s][A[A[A[A



 87%|█████

 98%|█████████▊| 975/1000 [07:11<00:11,  2.26it/s][A[A[A[A



 98%|█████████▊| 976/1000 [07:11<00:10,  2.26it/s][A[A[A[A



 98%|█████████▊| 977/1000 [07:12<00:10,  2.26it/s][A[A[A[A



 98%|█████████▊| 978/1000 [07:12<00:09,  2.26it/s][A[A[A[A



 98%|█████████▊| 979/1000 [07:13<00:09,  2.26it/s][A[A[A[A



 98%|█████████▊| 980/1000 [07:13<00:08,  2.26it/s][A[A[A[A



 98%|█████████▊| 981/1000 [07:13<00:08,  2.26it/s][A[A[A[A



 98%|█████████▊| 982/1000 [07:14<00:07,  2.26it/s][A[A[A[A



 98%|█████████▊| 983/1000 [07:14<00:07,  2.26it/s][A[A[A[A



 98%|█████████▊| 984/1000 [07:15<00:07,  2.26it/s][A[A[A[A



 98%|█████████▊| 985/1000 [07:15<00:06,  2.26it/s][A[A[A[A



 99%|█████████▊| 986/1000 [07:16<00:06,  2.26it/s][A[A[A[A



 99%|█████████▊| 987/1000 [07:16<00:05,  2.26it/s][A[A[A[A



 99%|█████████▉| 988/1000 [07:17<00:05,  2.26it/s][A[A[A[A



 99%|█████████▉| 989/1000 [07:17<00:04,  2.26it/s][A[A[A[A



 99%|█████

In [106]:
inputs = []
for song in songs:
    inputs.extend(song['melspecs'])
inputs = np.array(inputs)
inputs.shape

(9050, 128, 128)

In [107]:
cnn_model = load_model('zoo/15/song_classify.h5')
vectorize_model = Model(inputs=cnn_model.input, outputs=cnn_model.layers[-4].output)
vectors = vectorize_model.predict(inputs)
vectors.shape




(9050, 256)

In [130]:
nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(vectors)

def most_similar_songs(song_idx):
    distances, indices = nbrs.kneighbors(vectors[song_idx * 10: song_idx * 10 + 10])
    c = Counter()
    for row in indices:
        for idx in row[1:]:
            c[idx // 10] += 1
    return c.most_common()

In [140]:
song_idx = 7
print(songs[song_idx]['path'])

print('---')
for idx, score in most_similar_songs(song_idx)[:5]:
    print(songs[idx]['path'], score)
print('')

/Users/douwe/Songs/00 shocking blue - Venus (yes the.mp3
---
/Users/douwe/Songs/00 shocking blue - Venus (yes the.mp3 20
/Users/douwe/Songs/The Shocking Blue/Have A Nice Day_ Vol 1/00 Venus.mp3 12
/Users/douwe/Songs/The Byrds/00 Eve of Destruction.mp3 12
/Users/douwe/Songs/Goldfinger _ Weezer _ NoFx _ L/00 AWESOME.mp3 6
/Users/douwe/Songs/Boudewijn de Groot/Wonderkind Aan Het Strand (dis/03 Prikkebeen.mp3 3



In [89]:
duration = 30  # seconds
fs = 22050
myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=1)

In [71]:
myrecording.shape

(661500, 1)

In [90]:
sd.play(myrecording, samplerate=fs)

In [75]:
myrecording.min()

-0.40555808

In [124]:
signal, sr = librosa.load('/Users/douwe/Dropbox/Apps/Hi-Q Recordings/recording-20180219-162112.mp3', res_type='kaiser_fast', offset=0, duration=30)

melspec = librosa.feature.melspectrogram(signal, sr=sr).T[:1280,]
melspecs = np.asarray(np.split(melspec, 10))
melspecs.shape

(10, 128, 128)

In [125]:
recorded_vectors = vectorize_model.predict(melspecs)

In [126]:
distances, indices = nbrs.kneighbors(recorded_vectors)
c = Counter()
for row in indices:
    for idx in row[1:]:
        c[idx // 10] += 1
for idx, _ in c.most_common():
    print(songs[idx]['path'])


/Users/douwe/Songs/Alan Parsons Project/Anthology/15 La Sagrada Familia.mp3
/Users/douwe/Songs/Tom Petty & The Heartbreakers/Greatest hits/13 Runnin' down a dream.mp3
/Users/douwe/Songs/Miklós Szenthelyi_ György Gy/Zeit für Romantik - Romantisc/02 Beethoven_ Violin Romance #2 I.mp3
/Users/douwe/Songs/Mozart/_notag_/08 Eine Kleine Nachtmusik.mp3
/Users/douwe/Songs/00 Cafe Del Mar - Ibiza - 01 - Jo.mp3
/Users/douwe/Songs/Golden Earring/The Naked Truth/04 Mad Love's Comin'.mp3


In [121]:
signal, sr = librosa.load('/Users/douwe/Dropbox/Apps/Hi-Q Recordings/recording-20180219-162112.mp3', res_type='kaiser_fast', offset=0, duration=30)

In [123]:
sd.play(signal.flatten(), samplerate=sr)