In [1]:
import os
import boto3
import pickle
import numpy
import music_tag
import random
import json
from pathlib import Path

from utils.music_utils import *

import numpy as np
from pymilvus import connections, utility
from pymilvus import Collection, DataType, FieldSchema, CollectionSchema

In [2]:
from dotenv import load_dotenv
load_dotenv()

URI = os.getenv("MILVUS_URI")
TOKEN = os.getenv("MILVUS_TOKEN")
BUCKET = os.getenv("BUCKET")

In [3]:
def get_list_of_all_filepaths_in_S3(bucket):
    s3_client = boto3.client('s3')
    try:
        contents = s3_client.list_objects(Bucket=bucket)
        return [content['Key'] for content in contents['Contents']]

    except Exception as e:
        pass
    return None

In [4]:
s3_client = boto3.client('s3') 

list_of_files_in_S3 = get_list_of_all_filepaths_in_S3(BUCKET)
print(len(list_of_files_in_S3))

174


In [5]:
import random
rdm_from_s3 = random.choice(list_of_files_in_S3)
print(rdm_from_s3)

uploads/Music_Dataset/Moriarty/03-moriarty-private_lily.mp3


In [6]:
# the paths in S3 have changed, so we need to update them
song_from_s3 = rdm_from_s3.split('/')[-1]
song_from_s3

# loop over every pkl files in Path('MegaSet'), read the data['filename']
# if the filename is the same as the song_from_s3, then we have the correct pkl file

for pkl_file in Path('MegaSet').rglob('*.pkl'):
    with open(pkl_file, 'rb') as f:
        data = pickle.load(f)
        if data['filename'] == song_from_s3:
            query_path = pkl_file.with_suffix('.mp3')
            print(query_path)
            break

MegaSet/Moriarty/Moriarty - 2007 - Gee Whiz But this is A Lonesome Town/03-moriarty-private_lily.mp3


In [7]:
connections.connect("default",
                    uri=URI,
                    token=TOKEN)
print(f"Connecting to DB: {URI}")
print(utility.list_collections())

collection_512 = Collection("embeddings_512")

Connecting to DB: https://in03-efa63c0579a14a1.api.gcp-us-west1.zillizcloud.com
['predictions_87', 'embeddings_512']


In [8]:
entities = collection_512.query(
    expr=f'path == "{query_path}"',
    output_fields=["*"]
)
embedding_512 = entities[0]['embedding']

In [9]:
query_artist = entities[0]['artist']
query_album = entities[0]['album']
query_title = entities[0]['title']
query_top3_genres = entities[0]['top_5_genres'][:3]

print(query_artist)
print(query_album)
print(query_title)
print(query_top3_genres)

Moriarty
Gee Whiz but This Is a Lonesome Town
Private Lily
['folk', 'pop', 'alternative']


In [10]:
bigresult = collection_512.search(
    data=[embedding_512],
    anns_field="embedding",
    param={"nprobe": 16},
    limit=200,
    offset=1,
    output_fields=["*"],
)

print(f'{"Title":<30} | {"Artist":<25} | {"Top 5 Genres":<25} | {"Path":<50}')
print('-' * 145)

already_proposed_artits = []
count = 0
for result in bigresult[0]:
    if count >= 20:
        break
    already_proposed_artits.append(result.artist)
    count += 1
    print(f"{result.title[:29]:<30} | {result.artist[:39]:<25} | {', '.join(result.top_5_genres[:2]):<25} | {result.path[:49]:<50} | ")

Title                          | Artist                    | Top 5 Genres              | Path                                              
-------------------------------------------------------------------------------------------------------------------------------------------------
Cottonflower                   | Moriarty                  | pop, folk                 | MegaSet/Moriarty/Moriarty - 2007 - Gee Whiz But t  | 
Dans mon café préféré          | Volo                      | pop, rock                 | MegaSet/Volo/2005  Bien zarbos/16 - Café préféré.  | 
Jimmy                          | Moriarty                  | pop, blues                | MegaSet/Nova Tunes/Nova Tunes 17/09-moriarty-jimm  | 
Motel                          | Moriarty                  | blues, pop                | MegaSet/Moriarty/Moriarty - 2007 - Gee Whiz But t  | 
Whiteman’s Ballad              | Moriarty                  | pop, folk                 | MegaSet/Moriarty/Moriarty - 2007 - Gee Whiz But t  | 