In [1]:
import os
import random
import json
import pickle

from utils.music_utils import *
import music_tag

from pathlib import Path
import numpy as np

from pymilvus import connections, utility
from pymilvus import Collection, DataType, FieldSchema, CollectionSchema

In [2]:
DATASET = Path("MegaSet")
pkl_files = list(DATASET.rglob('*.pkl'))
valid_files = [check_file_info(pkl_file) for pkl_file in pkl_files]
print(f"Number of valid files: {sum(valid_files)} | Number of invalid files: {len(valid_files) - sum(valid_files)}")

Number of valid files: 16374 | Number of invalid files: 0


In [3]:
from dotenv import load_dotenv
load_dotenv()

URI = os.getenv("MILVUS_URI")
TOKEN = os.getenv("MILVUS_TOKEN")

In [None]:
# connect to milvus
connections.connect("default",
                    uri=URI,
                    token=TOKEN)
print(f"Connecting to DB: {URI}")
print(utility.list_collections())

In [5]:
collection_512 = Collection("embeddings_512")

In [6]:
entities = collection_512.query(
    expr='array_contains(top_5_genres, "hiphop")',
    output_fields=["count(*)"]
)

print(f"Number of hiphop songs: {entities[0]['count(*)']}")

entities = collection_512.query(
    expr='array_contains_all(top_5_genres, ["hiphop", "jazz"])',
    output_fields=["count(*)"]
)

print("Number of songs that are both hiphop and jazz: ", entities[0]['count(*)'])

Number of hiphop songs: 5712
Number of songs that are both hiphop and jazz:  240


In [7]:
custom_playlist = [
    "MegaSet/Jurassic 5/Jurassic 5 - Quality Control (2000)/001 How We Get Along.mp3",
    "MegaSet/Jurassic 5/Jurassic 5 - Quality Control (2000)/010 Jurass Finish First.mp3",
    "MegaSet/Jurassic 5/Jurassic 5 - Quality Control (2000)/013 The Game.mp3",
    "MegaSet/Jurassic 5/Jurassic 5 - Quality Control (2000)/015 Swing Set.mp3"
]

In [8]:
playlist_embeddings = []
playlist_artists = []
playlist_genres = []

for song in custom_playlist:
    res = collection_512.query(expr=f'path == "{song}"', output_fields=["*"])

    playlist_embeddings.append(res[0]["embedding"])
    if res[0]["artist"] not in playlist_artists:
        playlist_artists.append(res[0]["artist"])

    for g in res[0]["top_5_genres"]:
        if g not in playlist_genres:
            playlist_genres.append(g)

print(f"Playlist artists: {playlist_artists}")
print(f"Playlist genres: {playlist_genres}")

Playlist artists: ['Jurassic 5']
Playlist genres: ['funk', 'jazz', 'electronic', 'pop', 'rock', 'hiphop', 'rap', 'alternative', 'experimental']


In [9]:
playlist_embedding = np.mean(playlist_embeddings, axis=0)
print(f"Playlist embedding shape: {playlist_embedding.shape}")

Playlist embedding shape: (512,)


In [10]:
bigresult = collection_512.search(
    data=[playlist_embedding],
    anns_field="embedding",
    param={"nprobe": 16},
    limit=200,
    offset=1,
    output_fields=["*"],
)

print(f'{"Title":<30} | {"Artist":<40} | {"Top 5 Genres"}')
print('-' * 75)

already_proposed_artits = []
count = 0
for result in bigresult[0]:
    # if result.artist in already_proposed_artits or result.artist in playlist_artists:
    #     continue
    if count >= 20:
        break
    already_proposed_artits.append(result.artist)
    count += 1
    print(f"{result.title[:29]:<30} | {result.artist[:39]:<40} | {', '.join(result.top_5_genres)}")

Title                          | Artist                                   | Top 5 Genres
---------------------------------------------------------------------------
Jurass Finish First            | Jurassic 5                               | hiphop, rap, electronic, alternative, rock
Un scratch, un beat, un rap    | Disiz La Peste                           | hiphop, rap, electronic, pop, reggae
Africanize Dem                 | Patrice                                  | hiphop, electronic, rock, pop, alternative
09 - Down For The Count (feat  | Talib Kweli & Hi-Tek                     | hiphop, rap, pop, reggae, electronic
Contact                        | Jurassic 5                               | electronic, hiphop, experimental, alternative, rap
Quality Intro                  | Jurassic 5                               | electronic, hiphop, experimental, alternative, reggae
The Game                       | Jurassic 5                               | hiphop, rap, pop, electronic, rock
Pap

In [11]:
########## 87

with open("utils/mtg_jamendo_genre.json", "r") as json_file:
    metadata = json.load(json_file)
classes = metadata.get("classes")

collection_87 = Collection("predictions_87")
playlist_predictions = []
playlist_artists = []

for song in custom_playlist:
    res = collection_87.query(expr=f'path == "{song}"', output_fields=["artist", "predictions"])
    res = res[0]
    
    playlist_predictions.append(res["predictions"])
    if res["artist"] not in playlist_artists:
        playlist_artists.append(res["artist"])


In [12]:
print(f"Playlist artists: {playlist_artists}")
print(f"Playlist predictions: {playlist_predictions}")

Playlist artists: ['Jurassic 5']
Playlist predictions: [[0.005762913, 0.008253301, 0.0034851625, 0.0071143573, 0.017980726, 0.104377165, 0.0041923956, 0.049063977, 0.0071481564, 0.024958074, 0.0033044128, 0.008596563, 0.008770922, 0.003075228, 0.017866416, 0.019159041, 0.0015088706, 0.008702474, 0.0026463908, 0.0055947155, 0.002400248, 0.011374329, 0.027170414, 0.003779239, 0.0023712434, 0.0033102299, 0.020748364, 0.010023192, 0.007975754, 0.014262985, 0.0043711397, 0.029198537, 0.0021468787, 0.1600409, 0.0033185922, 0.013859835, 0.006944419, 0.0012383214, 0.11420882, 0.015839698, 0.22673073, 0.045681458, 0.021750843, 0.0064038895, 0.003158051, 0.0057094204, 0.041474447, 0.01316731, 0.0031139345, 0.019663494, 0.035182394, 0.012613428, 0.04153401, 0.017195258, 0.21501057, 0.028628124, 0.013428143, 0.021873157, 0.0013072963, 0.009379363, 0.0038117596, 0.0072622136, 0.0024246832, 0.007676928, 0.12849125, 0.01350378, 0.021970263, 0.0022968617, 0.008117019, 0.010785368, 0.011316147, 0.02122

In [13]:
playlist_predictions = np.mean(playlist_predictions, axis=0)
sorted_indices = playlist_predictions.argsort()
top_5_indices = sorted_indices[-5:][::-1]
for i in top_5_indices:
    print(f"{classes[i]}: {playlist_predictions[i]}")

hiphop: 0.365524023771286
rap: 0.18511901795864105
electronic: 0.1704750657081604
jazz: 0.1005338504910469
pop: 0.09983273595571518


In [14]:
# search for songs that are similar to the custom playlist and have at least one of the top 5 genres
results = collection_512.search(
    data=[playlist_embedding],
    anns_field="embedding",
    param={"nprobe": 16},
    limit=120,
    output_fields=["*"],
    filter=f'array_contains_any(top_5_genres, ["hiphop", "reggae", "rock", "funk"])'
)

print(f'{"Title":<30} | {"Artist":<30} | {"Top 5 Genres"}')
print('-' * 75)

already_proposed_artits = []
count = 0

# loop over the results and print only the ones that have artist not found in already proposed AND not found in the custom playlist
for result in results[0]:
    if result.artist in already_proposed_artits or result.artist in playlist_artists:
        continue
    if count >= 20:
        break
    already_proposed_artits.append(result.artist)
    count += 1
    print(f"{result.title[:29]:<30} | {result.artist[:39]:<40} | {', '.join(result.top_5_genres)}")
    

Title                          | Artist                         | Top 5 Genres
---------------------------------------------------------------------------
Un scratch, un beat, un rap    | Disiz La Peste                           | hiphop, rap, electronic, pop, reggae
Africanize Dem                 | Patrice                                  | hiphop, electronic, rock, pop, alternative
09 - Down For The Count (feat  | Talib Kweli & Hi-Tek                     | hiphop, rap, pop, reggae, electronic
Papa ?                         | Hocus Pocus                              | hiphop, rap, electronic, jazz, ambient
L’Empire du côté obscur        | IAM                                      | hiphop, rap, electronic, soundtrack, experimental
01. LIGHT SPEED.flac.mp3       | Grieves                                  | hiphop, rap, pop, electronic, alternative
Manque de Q                    | M                                        | reggae, rock, pop, alternative, electronic
Right Thing (Z-Trip 'S

In [15]:
########################################

In [15]:
pkl_path = "MegaSet/Amy Whinehouse/Amy Winehouse - 2006 - Back To Black/01 Rehab.pkl"
with open(pkl_path, "rb") as pkl_file:
    data = pickle.load(pkl_file)
data.keys()
# data["embedding_512"].shape
# data["folder"]

dict_keys(['filename', 'filepath', 'folder', 'filesize', 'title', 'artist', 'album', 'year', 'tracknumber', 'genre', 'predictions_87', 'embedding_512', 'top_5_genres'])

In [16]:
folder = data["folder"].split("/")[-1]
folder

'Amy Winehouse - 2006 - Back To Black'

In [17]:
# propose une playlist de 20 chansons qui sont similaires et qui ne viennenet pas du meme dossier
folder = data["folder"].split("/")[-1]

results = collection_512.search(
    data=[data['embedding_512']],
    anns_field="embedding",
    param={"nprobe": 16},
    limit=120,
    output_fields=["*"],
    expr=f"folder != '{folder}'"
)

print(f'{"Title":<30} | {"Artist":<30} | {"Top 5 Genres"}')
print('-' * 75)

already_proposed_artits = []
count = 0

# loop over the results and print only the ones that have artist not found in already proposed AND not found in the custom playlist
for result in results[0]:
    if result.artist in already_proposed_artits or result.artist in playlist_artists:
        continue
    if count >= 20:
        break
    already_proposed_artits.append(result.artist)
    count += 1
    print(f"{result.title[:29]:<30} | {result.artist[:39]:<40} | {', '.join(result.top_5_genres)}")


Title                          | Artist                         | Top 5 Genres
---------------------------------------------------------------------------
Rehab                          | Amy Winehouse                            | pop, rock, electronic, alternative, indie
Gypsy                          | Nneka                                    | hiphop, electronic, rap, pop, triphop
Piste 4                        | Beat Torrent                             | electronic, hiphop, rock, alternative, pop
Yellow Submarine               | The Beatles                              | alternative, pop, rock, blues, indie
Sweet Calling                  | Alice Russell                            | electronic, pop, funk, alternative, lounge
Radio Ga Ga                    | Queen                                    | electronic, ambient, pop, easylistening, soundtrack
Bragg Jack                     | Mano Negra                               | rock, pop, alternative, indie, poprock
Daniella           

In [18]:
# propose une playlist de 20 chansons qui sont similaires et qui ne viennenet pas du meme artiste

results = collection_512.search(
    data=[data['embedding_512']],
    anns_field="embedding",
    param={"nprobe": 16},
    limit=120,
    output_fields=["*"],
    expr=f"artist != '{data['artist']}'"
)

print(f'{"Title":<30} | {"Artist":<30} | {"Top 5 Genres"}')
print('-' * 75)

already_proposed_artits = []
count = 0

# loop over the results and print only the ones that have artist not found in already proposed AND not found in the custom playlist
for result in results[0]:
    if result.artist in already_proposed_artits or result.artist in playlist_artists:
        continue
    if count >= 20:
        break
    already_proposed_artits.append(result.artist)
    count += 1
    print(f"{result.title[:29]:<30} | {result.artist[:39]:<40} | {', '.join(result.top_5_genres)}")


Title                          | Artist                         | Top 5 Genres
---------------------------------------------------------------------------
Gypsy                          | Nneka                                    | hiphop, electronic, rap, pop, triphop
Piste 4                        | Beat Torrent                             | electronic, hiphop, rock, alternative, pop
Yellow Submarine               | The Beatles                              | alternative, pop, rock, blues, indie
Sweet Calling                  | Alice Russell                            | electronic, pop, funk, alternative, lounge
Radio Ga Ga                    | Queen                                    | electronic, ambient, pop, easylistening, soundtrack
Bragg Jack                     | Mano Negra                               | rock, pop, alternative, indie, poprock
Daniella                       | The John Butler Trio                     | rock, alternative, pop, indie, electronic
Elegy (London, UK) 

In [19]:
def create_client():
    import requests
    id=6548
    url = f"{URI}/v1/vector/query"
    payload = f'{{"collectionName":"embeddings_512","filter":"id in [{id}]"}}'
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Accept": "application/json",
        "Content-Type": "application/json",
    }
    response = requests.post(url, data=payload, headers=headers).json()
    embedding = response.get("data")[0].get("embedding")

    connections.connect(uri=URI, token=TOKEN)
    collection_512 = Collection(name="embeddings_512")

    search_result = collection_512.search(
        data=[embedding],
        anns_field="embedding",
        param={"nprobe": 16},
        limit=9,
        offset=1,
        output_fields=["id", "path"]
    )

    return search_result[0]

In [20]:
result = create_client()
for i in result:
    print(i.path)
    print(i.id)

MegaSet/Jack Johnson/Jack Johnson - 2003 - On and on2/02-The Horizon Has Been Defeated.mp3
5876
MegaSet/Winston Mc Anuff/Winston McAnuff - 2008 - Nostradamus/10 - The Pack.mp3
838
MegaSet/Paul Simon/Paul Simon - (1975) - Still Crazy After All These Years [Bonus Tracks]/04.  50 Ways To Leave Your Lover.mp3
16277
MegaSet/Yellow Umbrella/Yellow Umbrella [2007] - Little Planet/04_yellow_umbrella_-_energie.mp3
12076
MegaSet/Yellow Umbrella/Yellow Umbrella [2007] - Little Planet/08_yellow_umbrella_-_new_time_warrior.mp3
12068
MegaSet/Dub Incorporation/2003 - Diversite/08 - Lechiquier.mp3
3559
MegaSet/Winston Mc Anuff/Winston McAnuff - 2008 - Nostradamus/04 - Vain Imagining.mp3
839
MegaSet/Groundation/2006 - Upon the Bridge/09_groundation-mighty_souls.mp3
12962
MegaSet/Groundation/best of groundation/09_groundation-mighty_souls.mp3
12896
