In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
df = pickle.load(open('df.pkl', 'rb'))
songs_count = df.shape[0]
energy_similarity_mapping = pickle.load(open('energy_similarity_mapping.pkl', 'rb'))

In [3]:
def sort_by_popularity(songs, descending=True):
    if descending:
        return songs.sort_values(by=['Popularity'])[::-1]
    else:
        return songs.sort_values(by=['Popularity'])

In [4]:
def get_similar(track_index, count, comparison_matrix, select_smallest):
    similar_songs_indexes = np.argsort(np.array(comparison_matrix[track_index]))
    similar_songs_indexes = np.delete(similar_songs_indexes, np.where(similar_songs_indexes == track_index))
    similar_songs_indexes = similar_songs_indexes[:count] if select_smallest else similar_songs_indexes[::-1][:count]
    return df.iloc[similar_songs_indexes].copy()

In [5]:
def recommendations_as_list(songs, include_fields):
    songs = songs[include_fields].copy()
    songs['index'] = songs.index
    return songs.to_dict(orient='records')

In [6]:
def get_closest_n(track_index, count):
    if track_index >= count//2 and track_index < songs_count-count//2:
        return pd.concat([df.iloc[track_index-count//2 : track_index], df.iloc[track_index+1 : track_index+count//2+1]])
    elif track_index < count//2:
        return df.head(count+1).drop(track_index)
    else:
        return df.tail(count+1).drop(track_index)

In [7]:
def get_metadata(track_index):
    return df.iloc[track_index][['Track Name', 'Artist Name(s)']].to_dict()

In [8]:
def get_by_same_artist(track_index, count):
    return df[df['Artist Name(s)'] == df.iloc[track_index]['Artist Name(s)']].drop(track_index)[:count]

In [9]:
def get_energy_similar(track_index, count):
    similar_songs_indexes = energy_similarity_mapping[track_index][:count]
    return df.iloc[similar_songs_indexes].copy()

In [10]:
def get_random(count):
    return df.sample(count)

In [11]:
def get_released_around_same_time(track_index, count):
    return get_closest_n(track_index, count)

In [12]:
def recommend_by_same_artist(track_index, count, prioritisePopular, include_fields):
    songs_by_same_artist = get_by_same_artist(track_index, count*2)
    songs_by_same_artist = sort_by_popularity(songs_by_same_artist, prioritisePopular)[:count]
    return recommendations_as_list(songs_by_same_artist, include_fields)

In [13]:
def recommend_energy_similar(track_index, count, prioritisePopular, include_fields):
    similar_songs = get_energy_similar(track_index, count*2)
    similar_songs = sort_by_popularity(similar_songs, prioritisePopular)[:count]
    return recommendations_as_list(similar_songs, include_fields)

In [14]:
def recommend_released_around_same_time(track_index, count, prioritisePopular, include_fields):
    contemporary_songs = get_released_around_same_time(track_index, count*2)
    contemporary_songs = sort_by_popularity(contemporary_songs, prioritisePopular)[:count]
    return recommendations_as_list(contemporary_songs, include_fields)

In [15]:
def recommend_random(count, prioritisePopular, include_fields):
    random_songs = get_random(count*2)
    random_songs = sort_by_popularity(random_songs, prioritisePopular)[:count]
    return recommendations_as_list(random_songs, include_fields)

In [17]:
def hybrid_recommend(track_index, count=5, prioritisePopular=True):
    include_fields = ['track_name', 'track_artist']
    all_recommendations = dict()
    all_recommendations['by same artist'] = recommend_by_same_artist(track_index, count, prioritisePopular, include_fields)
    all_recommendations['similar energy'] = recommend_energy_similar(track_index, count, prioritisePopular, include_fields)
    all_recommendations['released around same time'] = recommend_released_around_same_time(track_index, count, prioritisePopular, include_fields)
    all_recommendations['random'] = recommend_random(count, prioritisePopular, include_fields)
    return all_recommendations

In [19]:
from random import randint

random_song_index = randint(0, songs_count-1)

# test if number of each recommendation is less than or equal to the given count
def test_recommend_output():
    hybrid_recommend_output = hybrid_recommend(random_song_index, 5, True)
    for songs in hybrid_recommend_output.values():
        assert len(songs) <= 5

# test if metadata for a song includes the necessary keys
def test_metadata_output():
    get_metadata_output = get_metadata(random_song_index)
    assert {'track_name', 'track_artist', 'lyrics'}.issubset(get_metadata_output)