#Installing all dependent libraries

In [3]:
%pip install numpy
%pip install pandas
%pip install sklearn
%pip install seaborn

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1304 sha256=2a43b384b11e993fe96c50865de6d79213366d9a685b5e2a002cb4bb77ee6f97
  Stored in directory: /home/gitpod/.cache/pip/wheels/22/0b/40/fd3f795caaa1fb4c6cb738bc1f56100be1e57da95849bfc897
Successfully built sklearn
Installing collected packages: sklearn
Successfully installed sklearn-0.0
[0mNote: you may need to restart the kernel to use updated packages.
Collecting seaborn
  Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.8/292.8 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hColl

In [4]:
#Importing all dependent libraries

In [5]:
import pandas as pd
import numpy as np
import seaborn as sb

In [6]:
#Reading the csv file spotify data set and preprocessing the dataset according to required features

In [7]:
data = pd.read_csv('spotify_songs.csv')
data = data[data['language'] == 'en']
data.drop(columns=['language', 'playlist_name', 'playlist_id'], inplace=True)
data = data.drop_duplicates(subset=['track_name', 'track_artist'])
data['track_album_release_date'] = pd.to_datetime(data['track_album_release_date'], infer_datetime_format=True)
data = data.sort_values(by=['track_album_release_date'])
data.reset_index(drop=True, inplace=True)
print(data.columns)
songs_count = data.shape[0]
print(songs_count)

Index(['track_id', 'track_name', 'track_artist', 'lyrics', 'track_popularity',
       'track_album_id', 'track_album_name', 'track_album_release_date',
       'playlist_genre', 'playlist_subgenre', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'duration_ms'],
      dtype='object')
13726


In [8]:
#Sectioning off data for recommendation subsystem

In [9]:
lyrics_data = data['lyrics']
energy_data = data[['danceability', 'tempo', 'acousticness']]
mood_data = data[['mode', 'key', 'valence']]

In [10]:
#Using cosine similarity and Tfidf for making lyrics comparable

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

lyric_vectorizer = TfidfVectorizer(stop_words='english')
lyrics_data = lyric_vectorizer.fit_transform(lyrics_data)
lyric_similarity_matrix = cosine_similarity(lyrics_data)

In [12]:
#Using euclidean distance for making energy and mood comparable

In [13]:
from sklearn.metrics.pairwise import euclidean_distances

energy_difference_matrix = euclidean_distances(energy_data)
mood_difference_matrix = euclidean_distances(mood_data)

In [14]:
#Utility functions


In [15]:
def sort_by_popularity(songs, descending=True):
    if descending:
        return songs.sort_values(by=['track_popularity'])[::-1]
    else:
        return songs.sort_values(by=['track_popularity'])


def get_similar(track_index, count, comparison_matrix, select_smallest):
    similar_songs_indexes = np.argsort(np.array(comparison_matrix[track_index]))
    similar_songs_indexes = np.delete(similar_songs_indexes, np.where(similar_songs_indexes == track_index))
    similar_songs_indexes = similar_songs_indexes[:count] if select_smallest else similar_songs_indexes[::-1][:count]
    return data.iloc[similar_songs_indexes].copy()


def songs_as_dict(songs, include_fields):
    return songs[include_fields].to_dict(orient='index')


def get_closest_n(track_index, count):
    if track_index >= count//2 and track_index < songs_count-count//2:
        return pd.concat([data.iloc[track_index-count//2 : track_index], data.iloc[track_index+1 : track_index+count//2+1]])
    elif track_index < count//2:
        return data.head(count+1).drop(track_index)
    else:
        return data.tail(count+1).drop(track_index)

In [16]:
#Getters for recommendation subsystems

In [17]:
def get_by_same_artist(track_index, count):
    return data[data['track_artist'] == data.iloc[track_index]['track_artist']].drop(track_index)[:count]


def get_lyrically_similar(track_index, count):
    return get_similar(track_index, count, lyric_similarity_matrix, False)


def get_energy_similar(track_index, count):
    return get_similar(track_index, count, energy_difference_matrix, True)


def get_mood_similar(track_index, count):
    return get_similar(track_index, count, mood_difference_matrix, True)


def get_random(count):
    return data.sample(count)


def get_released_around_same_time(track_index, count):
    return get_closest_n(track_index, count)

In [18]:
def recommend_by_same_artist(track_index, count, prioritisePopular):
    songs_by_same_artist = get_by_same_artist(track_index, count)
    songs_by_same_artist['recommendation_type'] = 'by same artist'
    return sort_by_popularity(songs_by_same_artist, prioritisePopular)


def recommend_lyrically_similar(track_index, count, prioritisePopular):
    similar_songs = get_lyrically_similar(track_index, count)
    similar_songs['recommendation_type'] = 'lyrically similar'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_energy_similar(track_index, count, prioritisePopular):
    similar_songs = get_energy_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar energy'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_mood_similar(track_index, count, prioritisePopular):
    similar_songs = get_mood_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar mood'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_released_around_same_time(track_index, count, prioritisePopular):
    contemporary_songs = get_released_around_same_time(track_index, count)
    contemporary_songs['recommendation_type'] = 'released around same time'
    return sort_by_popularity(contemporary_songs, prioritisePopular)


def recommend_random(count, prioritisePopular):
    random_songs = get_random(count)
    random_songs['recommendation_type'] = 'random'
    return sort_by_popularity(random_songs, prioritisePopular)

In [19]:
#Recommendation Subsystem

In [20]:
def recommend_by_same_artist(track_index, count, prioritisePopular):
    songs_by_same_artist = get_by_same_artist(track_index, count)
    songs_by_same_artist['recommendation_type'] = 'by same artist'
    return sort_by_popularity(songs_by_same_artist, prioritisePopular)


def recommend_lyrically_similar(track_index, count, prioritisePopular):
    similar_songs = get_lyrically_similar(track_index, count)
    similar_songs['recommendation_type'] = 'lyrically similar'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_energy_similar(track_index, count, prioritisePopular):
    similar_songs = get_energy_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar energy'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_mood_similar(track_index, count, prioritisePopular):
    similar_songs = get_mood_similar(track_index, count)
    similar_songs['recommendation_type'] = 'similar mood'
    return sort_by_popularity(similar_songs, prioritisePopular)


def recommend_released_around_same_time(track_index, count, prioritisePopular):
    contemporary_songs = get_released_around_same_time(track_index, count)
    contemporary_songs['recommendation_type'] = 'released around same time'
    return sort_by_popularity(contemporary_songs, prioritisePopular)


def recommend_random(count, prioritisePopular):
    random_songs = get_random(count)
    random_songs['recommendation_type'] = 'random'
    return sort_by_popularity(random_songs, prioritisePopular)

In [21]:
def hybrid_recommend(track_index, count=6, prioritisePopular=True):
    by_same_artist = recommend_by_same_artist(track_index, count, prioritisePopular)
    lyrically_similar = recommend_lyrically_similar(track_index, count, prioritisePopular)
    energy_similar = recommend_energy_similar(track_index, count, prioritisePopular)
    mood_similar = recommend_mood_similar(track_index, count, prioritisePopular)
    random = recommend_random(count, prioritisePopular)
    released_around_same_time = recommend_released_around_same_time(track_index, count, prioritisePopular)
    all_recommendations = pd.concat([by_same_artist, lyrically_similar, energy_similar, mood_similar, random, released_around_same_time]).drop_duplicates()
    return songs_as_dict(all_recommendations, include_fields=['track_name', 'track_artist', 'recommendation_type'])

hybrid_recommend(4982)

{5826: {'track_name': 'I Knew You Were Trouble.',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 5827: {'track_name': 'We Are Never Ever Getting Back Together',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 4136: {'track_name': 'Our Song',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 4686: {'track_name': 'You Belong With Me',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 5829: {'track_name': 'I Knew You Were Trouble',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 5556: {'track_name': '22',
  'track_artist': 'Taylor Swift',
  'recommendation_type': 'by same artist'},
 7816: {'track_name': 'What Do You Mean?',
  'track_artist': 'Justin Bieber',
  'recommendation_type': 'lyrically similar'},
 6697: {'track_name': 'I Mean It (feat. Remo)',
  'track_artist': 'G-Eazy',
  'recommendation_type': 'lyrically similar'},
 11972: {'trac

In [22]:
# testing / visualization
data[data['track_artist'] == 'Taylor Swift']

# data.iloc[2274]

# sb.displot(data=data, x='track_album_release_date')
# data[data['track_artist'] == 'Queen'].head(30)
# sonic_data.describe()

# data['track_artist'].value_counts()

Unnamed: 0,track_id,track_name,track_artist,lyrics,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_genre,playlist_subgenre,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
4136,43H3q2NWT9LCuYS1B5IUvV,Our Song,Taylor Swift,I was riding shotgun with my hair undone In th...,61,2rU7u7C2v5i45MFVxx7xG1,Taylor Swift (Big Machine Radio Release Special),2006-10-23,pop,post-teen pop,...,2,-4.932,1,0.0298,0.113,0.0,0.325,0.575,89.128,203440
4686,7G6TDmGJXDoBaV6NGj3Yic,You Belong With Me,Taylor Swift,"You're on the phone with your girlfriend, she'...",52,08CWGiv27MVQhYpuTtvx83,Fearless (International Version),2008-11-11,latin,latin hip hop,...,6,-4.436,1,0.0381,0.151,2e-05,0.104,0.469,129.969,230507
4982,0KztjtD3ytzgYjUKIVfBnW,Mean,Taylor Swift,"You, with your words like knives And swords an...",62,6Ar2o9KCqcyYF9J0aQP3au,Speak Now,2010-01-01,pop,post-teen pop,...,4,-3.978,1,0.0426,0.445,0.0,0.219,0.808,164.004,237747
5556,1mQRQlbf1nXbazPEuSyij8,22,Taylor Swift,It feels like a perfect night To dress up like...,0,092fm0yKd9V4VGi6G3u51Y,22,2012-01-01,pop,post-teen pop,...,7,-6.548,1,0.0373,0.00288,0.00133,0.0631,0.676,104.011,231867
5826,6FB3v4YcR57y4tXFcdxI1E,I Knew You Were Trouble.,Taylor Swift,"NA Once upon a time, a few mistakes ago I was ...",76,4jTYApZPMapg56gRycOn0D,Red (Big Machine Radio Release Special),2012-10-21,pop,post-teen pop,...,3,-6.798,0,0.0363,0.00454,2e-06,0.0335,0.679,77.019,219720
5827,0VwNdo84DaVYLIkbVO86ND,We Are Never Ever Getting Back Together,Taylor Swift,I remember when we broke up the first time Say...,74,4jTYApZPMapg56gRycOn0D,Red (Big Machine Radio Release Special),2012-10-21,pop,post-teen pop,...,7,-5.911,1,0.0916,0.00957,2.7e-05,0.121,0.75,85.984,193147
5829,6RjKVXi31fui9m4dbiGRrL,I Knew You Were Trouble,Taylor Swift,"NA Once upon a time, a few mistakes ago I was ...",16,5FerdPFXSHSnCVq4OBy4Ey,Red,2012-10-22,latin,latin hip hop,...,6,-6.744,1,0.0461,0.00593,2e-06,0.0361,0.637,76.76,219720
6389,1u8c2t2Cy7UBoG4ArRcF5g,Blank Space,Taylor Swift,"Nice to meet you, where you been? I could show...",78,1yGbNOtRIgdIiGHOEBaZWf,1989 (Deluxe),2014-01-01,pop,post-teen pop,...,5,-5.412,1,0.054,0.103,0.0,0.0913,0.57,95.997,231827
6917,5WfhXulggG0c6WoVeMPA8N,Shake It Off,Taylor Swift,I stay out too late Got nothing in my brain Th...,18,6w36pmMA5bxECalu5rxQAw,1989,2014-10-27,pop,electropop,...,7,-5.414,1,0.165,0.0561,0.0,0.148,0.943,160.02,219200
6918,273dCMFseLcVsoSWx59IoE,Bad Blood,Taylor Swift,"'Cause baby, now we've got bad blood You know ...",62,2QJmrSgbdM35R67eoGQo4j,1989,2014-10-27,edm,pop edm,...,7,-6.114,1,0.181,0.0871,6e-06,0.148,0.295,170.157,211933
