In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import LabelEncoder

In [2]:
song_df = pd.read_csv('spotify-data-hyper-rfc-final.csv')

In [3]:
songs_count = song_df.shape[0]
print(songs_count)

13726


In [4]:
print(song_df.dtypes)

track_id                     object
track_name                   object
track_artist                 object
lyrics                       object
track_popularity              int64
track_album_id               object
track_album_name             object
track_album_release_date     object
playlist_genre               object
playlist_subgenre            object
danceability                float64
energy                      float64
key                           int64
loudness                    float64
mode                          int64
speechiness                 float64
acousticness                float64
instrumentalness            float64
liveness                    float64
valence                     float64
tempo                       float64
duration_ms                   int64
predicted_mood               object
dtype: object


In [5]:
lyrics_data = song_df['lyrics']
energy_data = song_df['energy'].values.reshape(-1, 1)

In [6]:
#Lyrically Similar
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

lyric_vectorizer = TfidfVectorizer(stop_words='english')
lyrics_data = lyric_vectorizer.fit_transform(lyrics_data)
lyric_similarity_matrix = cosine_similarity(lyrics_data)

In [7]:
energy_difference_matrix = euclidean_distances(energy_data)
similar_energy_songs = energy_difference_matrix.argsort()

In [8]:
label_encoder = LabelEncoder()

encoded_mood_data = label_encoder.fit_transform(song_df['predicted_mood']).reshape(-1, 1)

mood_difference_matrix = euclidean_distances(encoded_mood_data)

In [9]:
def sort_by_popularity(songs, descending=True):
    if descending:
        return songs.sort_values(by=['track_popularity'])[::-1]
    else:
        return songs.sort_values(by=['track_popularity'])


def get_similar(track_index, count, comparison_matrix, select_smallest):
    similar_songs_indexes = np.argsort(np.array(comparison_matrix[track_index]))
    similar_songs_indexes = np.delete(similar_songs_indexes, np.where(similar_songs_indexes == track_index))
    similar_songs_indexes = similar_songs_indexes[:count] if select_smallest else similar_songs_indexes[::-1][:count]
    return song_df.iloc[similar_songs_indexes].copy()


def songs_as_dict(songs, include_fields):
    return songs[include_fields].to_dict(orient='index')


def get_closest_n(track_index, count):
    if track_index >= count//2 and track_index < songs_count-count//2:
        return pd.concat([song_df.iloc[track_index-count//2 : track_index], song_df.iloc[track_index+1 : track_index+count//2+1]])
    elif track_index < count//2:
        return song_df.head(count+1).drop(track_index)
    else:
        return song_df.tail(count+1).drop(track_index)

In [10]:
def get_by_same_artist(track_index, count):
    return song_df[song_df['track_artist'] == song_df.iloc[track_index]['track_artist']].drop(track_index)[:count]


def get_lyrically_similar(track_index, count):
    return get_similar(track_index, count, lyric_similarity_matrix, False)


def get_energy_similar(track_index, count):
    return get_similar(track_index, count, energy_difference_matrix, True)


def get_mood_similar(track_index, count):
    return get_similar(track_index, count, mood_difference_matrix, True)


def get_released_around_same_time(track_index, count):
    return get_closest_n(track_index, count)

In [11]:
def recommend_by_same_artist(track_index, count, prioritisePopular):
    songs_by_same_artist = get_by_same_artist(track_index, count)
    songs_by_same_artist['recommendation_type'] = 'by same artist'
    return sort_by_popularity(songs_by_same_artist, prioritisePopular)


def recommend_lyrically_similar(track_index, count, prioritisePopular):
    similar_songs = get_lyrically_similar(track_index, count)
    similar_songs['recommendation_type'] = 'lyrically similar'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_energy_similar(track_index, count, prioritisePopular):
    similar_songs = get_energy_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar energy'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_mood_similar(track_index, count, prioritisePopular):
    similar_songs = get_mood_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar mood'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_released_around_same_time(track_index, count, prioritisePopular):
    contemporary_songs = get_released_around_same_time(track_index, count)
    contemporary_songs['recommendation_type'] = 'released around same time'
    return sort_by_popularity(contemporary_songs, prioritisePopular)

In [12]:
def hybrid_recommend(track_index, count=6, prioritisePopular=True):
    by_same_artist = recommend_by_same_artist(track_index, count, prioritisePopular)
    lyrically_similar = recommend_lyrically_similar(track_index, count, prioritisePopular)
    energy_similar = recommend_energy_similar(track_index, count, prioritisePopular)
    mood_similar = recommend_mood_similar(track_index, count, prioritisePopular)
    released_around_same_time = recommend_released_around_same_time(track_index, count, prioritisePopular)
    all_recommendations = pd.concat([by_same_artist, lyrically_similar, energy_similar, mood_similar,released_around_same_time]).drop_duplicates()
    return songs_as_dict(all_recommendations, include_fields=['track_name', 'track_artist', 'recommendation_type'])

In [15]:
song_df[song_df['track_artist'] == 'Adele']

Unnamed: 0,track_id,track_name,track_artist,lyrics,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_genre,playlist_subgenre,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,predicted_mood
4535,7wnC64YPj2YH2oHYBmCzHt,Melt My Heart to Stone,Adele,Right under my feet is air made of bricks That...,57,2YO1F9DHVEzXPriA1JHoOQ,19,2008-01-28,r&b,neo soul,...,-7.091,0,0.0366,0.34,0.0,0.215,0.19,80.67,203907,romance
4537,59tg0OPhiHlbsVZ9GFqUk5,Chasing Pavements,Adele,I've made up my mind Don't need to think it ov...,7,6Pb3K1oPXdhsqFXtzKe3Z1,19,2008-01-28,r&b,neo soul,...,-6.073,0,0.0267,0.325,0.0,0.115,0.327,80.072,210507,romance
4538,4rtiQE5Yo2gC8kW91VCoXi,Tired,Adele,Hold my hand While you cut me down It'd only j...,0,6Pb3K1oPXdhsqFXtzKe3Z1,19,2008-01-28,pop,indie poptimism,...,-6.338,1,0.047,0.191,0.000646,0.0953,0.552,96.997,258853,romance
4539,1ARkKt39O6WQqE0QEpZntu,Right As Rain,Adele,NA Who wants to be right as rain? It's better ...,51,2YO1F9DHVEzXPriA1JHoOQ,19,2008-01-28,pop,indie poptimism,...,-5.876,1,0.0853,0.776,0.0,0.0891,0.918,137.196,197347,epic
5311,6QPKYGnAW9QozVz2dSWqRg,Someone Like You,Adele,I heard that you're settled down That you foun...,79,7n3QJc7TBOxXtlYh4Ssll8,21,2011-01-19,pop,dance pop,...,-8.251,1,0.028,0.893,0.0,0.0996,0.288,135.047,285240,sad
5312,5PKWUDfQFtc5qqo8cs1gQp,Set Fire to the Rain,Adele,"NA I let it fall, my heart And as it fell, you...",74,7n3QJc7TBOxXtlYh4Ssll8,21,2011-01-19,pop,dance pop,...,-3.882,0,0.0249,0.00408,2e-06,0.112,0.445,107.995,242974,upbeat
5313,5O6yk6U8L3rTbFsIwuqe8H,Turning Tables,Adele,Close enough to start a war All that I have is...,66,1azUkThwd2HfUDdeNeT147,21,2011-01-19,r&b,neo soul,...,-6.077,0,0.0293,0.952,1.7e-05,0.129,0.21,79.83,250133,sad
5314,4ThTvhGNae80mrLsMzs4Hb,He Won't Go,Adele,Some say I'll be better without you But they d...,56,1azUkThwd2HfUDdeNeT147,21,2011-01-19,r&b,neo soul,...,-7.044,0,0.0861,0.023,0.0,0.0731,0.71,159.824,277947,epic
5315,1CkvWZme3pRgbzaxZnTl5X,Rolling in the Deep,Adele,There's a fire starting in my heart Reaching a...,75,1azUkThwd2HfUDdeNeT147,21,2011-01-19,pop,electropop,...,-5.119,1,0.0294,0.131,0.0,0.0527,0.522,104.945,228293,upbeat
6780,2GblQ918RbkOs4Yo1Rpkcj,Rolling In The Deep,Adele,There's a fire starting in my heart Reaching a...,61,19xt2EkDfNcbAdQzOVUttd,Pay Close Attention : XL Recordings,2014-08-25,edm,pop edm,...,-5.114,1,0.0297,0.13,0.0,0.0584,0.54,104.941,228141,upbeat
