# Music Recommender ANNOY

### Import Libraries

In [1]:
import os
import numpy as np
import csv
import glob
import tensorflow as tf
import pandas as pd
import json
from ast import literal_eval
import random

from annoy import AnnoyIndex

eval_path = 'data/audioset_v1_embeddings/eval/'
bal_train = 'data/audioset_v1_embeddings/bal_train/'


### Import Train + Test Data

In [2]:
# Get files with relative paths and proper extension
audio_test_dataset = glob.glob(eval_path+"*.tfrecord")
audio_train_dataset = glob.glob(bal_train+"*.tfrecord")

# Convert audio_test_dataset to tf record dataset object
tf_test_dataset = tf.data.TFRecordDataset(audio_test_dataset)
del audio_test_dataset
tf_train_dataset = tf.data.TFRecordDataset(audio_train_dataset)
del audio_train_dataset

In [3]:
# Import Labels class indices
class_labels_all = pd.read_csv("./data/class_labels_indices.csv")
class_labels_train = pd.read_csv("./data/balanced_train_segments.csv",  sep=', ', engine='python')
class_labels_test = pd.read_csv("./data/eval_segments.csv", sep=', ', engine='python')

# Format 'positive_labels" on train and test df
class_labels_train['positive_labels'] = class_labels_train['positive_labels'].str.replace('"','').str.split(',')
class_labels_test['positive_labels'] = class_labels_test['positive_labels'].str.replace('"','').str.split(',')

# Merge Display Names on Train and Test DF
class_labels_train['display_name'] = class_labels_train.apply(
    lambda row : ','.join([class_labels_all.loc[class_labels_all['mid'] == x, 'display_name'].values[0] for x in row['positive_labels']]), axis = 1)

class_labels_test['display_name'] = class_labels_test.apply(
    lambda row : ','.join([class_labels_all.loc[class_labels_all['mid'] == x, 'display_name'].values[0] for x in row['positive_labels']]), axis = 1)


# raise SystemExit("Stop right there!")
# Select only Music from class_labels (there are different classes)
music_labels_train = class_labels_train[class_labels_train['display_name'].str.contains('Music', case=False)]
music_labels_test = class_labels_test[class_labels_test['display_name'].str.contains('Music', case=False)]
del class_labels_test
del class_labels_train


### Convert *.tfrecord to json

In [4]:
# Extract *.tfrecord data
def extract_tfrecord_dictionary(tf_iterator,youtubeID_filter, NUM_SECONDS = 10):
    dataset_audios = []
    for raw_file in tf_iterator:
        # Extract File
        extracted_file = tf.train.SequenceExample()
        extracted_file.ParseFromString(raw_file.numpy())

        # Get Audio Metadata
        single_audio = {}
        single_audio['label'] = extracted_file.context.feature['labels'].int64_list.value
        single_audio['video_id'] = str(extracted_file.context.feature['video_id'].bytes_list.value[0]).replace("'",' ').split()[-1]
        single_audio['start_time'] = extracted_file.context.feature['start_time_seconds'].float_list.value[0]
        single_audio['end_time'] = extracted_file.context.feature['end_time_seconds'].float_list.value[0]

        # Get audio_embedding
        full_audio_embedding = extracted_file.feature_lists.feature_list['audio_embedding'].feature
        feature_list = [list(feature.bytes_list.value[0]) for feature in full_audio_embedding]
        single_audio['data'] = [byte for sublist in feature_list[:NUM_SECONDS] for byte in sublist]

        # Making sure each data vector has the same 128 size
        if((len(single_audio['data']) != (128 * NUM_SECONDS)) or (single_audio['video_id'] not in youtubeID_filter)):
            continue
        
        # Append single_audio dict
        dataset_audios.append(single_audio)

    return dataset_audios

NUM_SECONDS = 10
dataset_final_test = extract_tfrecord_dictionary(tf_test_dataset,music_labels_test['YTID'].values, NUM_SECONDS)
dataset_final_train = extract_tfrecord_dictionary(tf_train_dataset,music_labels_train['YTID'].values, NUM_SECONDS)

# convert into json file
with open("audio_preprocessed_test.json", "w") as jsonFile:
   json.dump(repr(dataset_final_test), jsonFile)
with open("audio_preprocessed_train.json", "w") as jsonFile:
   json.dump(repr(dataset_final_train), jsonFile)

In [15]:
# Read json-file (To avoid re-process)
dataset_final_train = None
dataset_final_test = None
with open("audio_preprocessed_train.json", "rb") as jsonFile:
   # Read Data
   json_data = json.loads(jsonFile.read())
   # Convert to list of dicts
   dataset_final_train = literal_eval(json_data)
with open("audio_preprocessed_test.json", "rb") as jsonFile:
   # Read Data
   json_data = json.loads(jsonFile.read())
   # Convert to list of dicts
   dataset_final_test = literal_eval(json_data)

### ANNOY Training

In [12]:
AUDIO_DIM = 128*NUM_SECONDS

# Create annoy index
annoyModel= AnnoyIndex(AUDIO_DIM, 'angular')

# Run only on all parameters
for annoy_idx, single_audio_train in enumerate(dataset_final_train):
    # Get embedded audio data
    audio_data_train = single_audio_train['data']
    # Add to Annoy index
    annoyModel.add_item(annoy_idx, audio_data_train)

# Build Annoy
annoyModel.build(n_trees=5000)
# Save nearest neighbor model
annoyModel.save('nearest_neighbor_graph.ann')

True

In [14]:
# Sample Usage
# Get 5 Nearest Neighbors of test data sample

# Get Random Element
sample_test = random.choice(dataset_final_test) 
# Get Element audio Data
sample_vector_test = sample_test['data']

# Test Data
print("Test Vector")
labels_test = music_labels_test.loc[music_labels_test['YTID'] == sample_test['video_id'], 'display_name'].values
print(f"Labels: {sample_test['label']} {labels_test} - Video ID: {sample_test['video_id']}\n")
print(f"Youtube URL: http://www.youtube.com/watch?v={sample_test['video_id']}?&start={int(sample_test['start_time'])}&end={int(sample_test['end_time'])}\n")
print("....................................................................................")

# Get Nearest Neighbors from Annoy Model
nn_annoy = annoyModel.get_nns_by_vector(sample_vector_test, 5)

# Get The Nearest Neighbors Information
print('SONG RECOMMENDATIONS\n')
for i in nn_annoy:
    # We get the Reference from the training set
    sample = dataset_final_train[i]
    # Extract Classes
    music_labels_sample = music_labels_train.loc[music_labels_train['YTID'] == sample['video_id'], 'display_name'].values
    print(f"Annoy Index: {i} - Labels: {music_labels_sample} - Video ID: {sample['video_id']} - Start: {sample['start_time']} - End: {sample['end_time']}")
    # formated_start = 
    # formated_end = 
    print(f"Youtube URL: http://www.youtube.com/watch?v={sample['video_id']}?&start={int(sample['start_time'])}&end={int(sample['end_time'])}\n")

Test Vector
Labels: [152, 154, 160] ['Electric piano,Harpsichord,Keyboard (musical)'] - Video ID: 2laUv3y7OfA

Youtube URL: http://www.youtube.com/watch?v=2laUv3y7OfA?&start=30&end=40

....................................................................................
SONG RECOMMENDATIONS

Annoy Index: 998 - Labels: ['Harpsichord,Keyboard (musical),Piano'] - Video ID: CeW3NLlstf4 - Start: 170.0 - End: 180.0
Youtube URL: http://www.youtube.com/watch?v=CeW3NLlstf4?&start=170&end=180

Annoy Index: 2273 - Labels: ['Cello,Music,Musical instrument,Orchestra,Violin, fiddle,Classical music,Bowed string instrument'] - Video ID: yrme-KRBvzk - Start: 50.0 - End: 60.0
Youtube URL: http://www.youtube.com/watch?v=yrme-KRBvzk?&start=50&end=60

Annoy Index: 1500 - Labels: ['Harpsichord,Keyboard (musical)'] - Video ID: L1n4TWq2ZO8 - Start: 30.0 - End: 40.0
Youtube URL: http://www.youtube.com/watch?v=L1n4TWq2ZO8?&start=30&end=40

Annoy Index: 793 - Labels: ['Harpsichord,Keyboard (musical)'] - Video ID: