In [158]:
from panns_inference import AudioTagging
import numpy as np
import librosa
from elasticsearch import Elasticsearch
import os
from tinytag import TinyTag

In [165]:
# Set environment variabes
 

os.environ["ES_CLOUD"] = "<your_es_cloud_id>" 
os.environ["ES_KEY"] = "<your_es_api_key>"


In [160]:
# Load model to get embeddings from audio

model = AudioTagging(checkpoint_path=None, device='cuda')

Checkpoint path: /Users/alexanderdavila/panns_data/Cnn14_mAP=0.431.pth
Using CPU.


In [161]:
# Function to normalize a vector 

def normalize(v):
   norm = np.linalg.norm(v)
   if norm == 0:
        return v
   return v / norm

In [162]:
# Function to generate an embedding from an audio file

def get_embedding (audio_file):
  a, _ = librosa.load(audio_file, sr=44100)
  query_audio = a[None, :]
  _, emb = model.inference(query_audio)
  normalized_v = normalize(emb[0])
  return normalized_v

In [163]:
# Function to generate a elastic-friendly document from the song data

def generate_song_document (song):
    emb = get_embedding(song)
    metadata = TinyTag.get(song)
    document = {"title":song.strip("dataset/"),"embedding":emb, "album":metadata.album,"artist":str(metadata.artist).split("/")}
    return document

In [166]:
#Connection to Elasticsearch

index = "songs_test"
ES_CLOUD = os.environ["ES_CLOUD"]
KEY = os.environ["ES_KEY"]

es = Elasticsearch(cloud_id=ES_CLOUD,api_key=KEY)


print(es) 

<Elasticsearch(['https://6f49f79dd5454e49a08b00ab313c004b.us-east-1.aws.found.io:443'])>


In [167]:
# Function to upload a document to elasticsearh

def elastic_upload(doc):
    res = es.index(index=index, document=doc,id=doc["title"])

    return res

In [168]:
# Listing all the audio files in the dataset/ directory

songs = os.listdir("dataset/")
print(len(songs))

37


In [169]:
# Function to get a random document to Elasticsearch (to get a sample song to generate recommendations)

def get_random_doc():
    random_query = {
          "function_score": {
             "functions": [
                {
                   "random_score": {
                     
                   }
                }
             ]
          }
    }
    doc = es.search(query=random_query, index=index, size=1)

    return doc["hits"]["hits"][0]
    

In [170]:
# Generating embeddings and uploading the document for each song

for song in songs:
    doc = generate_song_document("dataset/"+song)
    elastic_upload(doc)
    songs.remove(song)

In [196]:
# Getting a random song and extracting the data

search_song = get_random_doc()
search_emb = search_song["_source"]["embedding"]
print("Search_song: ",search_song["_source"]["title"])

print("by: ",search_song["_source"]["artist"])
print("album: ",search_song["_source"]["album"])
print("-------------------------------")
print("-------------------------------")

Search_song:  Meet Me Halfway.mp3
by:  ['Black Eyed Peas']
album:  THE E.N.D. (THE ENERGY NEVER DIES) [Deluxe Version]
-------------------------------
-------------------------------


In [200]:
# knn query to get related embeddings --> This is the recommendation system main operation

knn_query = {
        "field": "embedding",
        "query_vector": search_emb,
        "num_candidates": 100,
        "k":3
    }

# filter on the knn query to filter out the selected random song, otherwise that same song would be the top result every time

knn_query["filter"] = {
                "bool": {
                  "must_not":{
                    "bool":{
                      "filter":[
                        {
                          "term":{
                            "_id": search_song["_source"]["title"]
                          }
                        }
                      ]
                    }
                  }
                }
              }


In [201]:
# Run the recommendation search and print results



res = es.search(index=index,knn=knn_query,source=["title","artist","album"])#,filter=filter)
recommended_songs = res["hits"]["hits"]
print("Recommended songs: \n")

for song in recommended_songs:
    
    print("song: ",song["_source"]["title"])
    print("by: ",song["_source"]["artist"])
    print("album: ",song["_source"]["album"])
    print("-------------------------------")


Recommended songs: 

song:  Till I Collapse.mp3
by:  ['Eminem', 'Nate Dogg']
album:  The Eminem Show
-------------------------------
song:  TiK ToK.mp3
by:  ['Kesha']
album:  Animal (Expanded Edition)
-------------------------------
song:  Gimme More.mp3
by:  ['Britney Spears']
album:  Blackout
-------------------------------
