In [1]:
# Import library
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import re

In [2]:
raw_data = pd.read_csv(".../Data/processed_data.csv")
print(raw_data.columns)

Index(['Unnamed: 0.1', 'Unnamed: 0', 'pos', 'artist_name', 'track_uri',
       'artist_uri', 'track_name', 'album_uri', 'duration_ms_x', 'album_name',
       'name', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url',
       'duration_ms_y', 'time_signature', 'artist_pop', 'genres', 'track_pop'],
      dtype='object')


In [3]:
raw_data.drop(columns=["Unnamed: 0",'Unnamed: 0.1'], inplace = True)
raw_data.head()

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms_x,album_name,name,danceability,...,type,id,uri,track_href,analysis_url,duration_ms_y,time_signature,artist_pop,genres,track_pop
0,0,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,0.904,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
1,73,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,w o r k o u t,0.904,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
2,14,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,party playlist,0.904,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
3,42,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Dance mix,0.904,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69
4,1,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,spin,0.904,...,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69


# Dropping the duplicate songs from the raw_data

In [4]:
def drop_duplicate_songs(df):
    
    df["artists_song"] = df.apply(lambda row: row["artist_name"]+row["track_name"],axis = 1)
    return df.drop_duplicates("artists_song")

song_data = drop_duplicate_songs(raw_data)
song_data.head()

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms_x,album_name,name,danceability,...,id,uri,track_href,analysis_url,duration_ms_y,time_signature,artist_pop,genres,track_pop,artists_song
0,0,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,0.904,...,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69,Missy ElliottLose Control (feat. Ciara & Fat M...
6,1,Britney Spears,6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks,0.774,...,6I9VzXrHxO9rA9A5euc8Ak,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,https://api.spotify.com/v1/audio-analysis/6I9V...,198800,4,84,dance_pop pop post-teen_pop,83,Britney SpearsToxic
19,2,Beyoncé,0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,0.664,...,0WqIKmW4BTrj3eJFmnCKMv,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,https://api.spotify.com/v1/audio-analysis/0WqI...,235933,4,86,dance_pop pop r&b,25,BeyoncéCrazy In Love
46,3,Justin Timberlake,1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified,Throwbacks,0.892,...,1AWQoqb9bSvzTjaLralEkT,spotify:track:1AWQoqb9bSvzTjaLralEkT,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,https://api.spotify.com/v1/audio-analysis/1AWQ...,267267,4,82,dance_pop pop,79,Justin TimberlakeRock Your Body
55,4,Shaggy,1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot,Throwbacks,0.853,...,1lzr43nnXAijIGYnCT8M8H,spotify:track:1lzr43nnXAijIGYnCT8M8H,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,https://api.spotify.com/v1/audio-analysis/1lzr...,227600,4,75,pop_rap reggae_fusion,2,ShaggyIt Wasn't Me


# Selecting the columns which will be usefull for analysis

In [5]:
print(song_data.columns)

Index(['pos', 'artist_name', 'track_uri', 'artist_uri', 'track_name',
       'album_uri', 'duration_ms_x', 'album_name', 'name', 'danceability',
       'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'type', 'id', 'uri',
       'track_href', 'analysis_url', 'duration_ms_y', 'time_signature',
       'artist_pop', 'genres', 'track_pop', 'artists_song'],
      dtype='object')


In [6]:
def select_features(df):
       
       return df[['artist_name','id','track_name', 'artist_pop', 'genres', 'track_pop', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','time_signature']]
song_data = select_features(song_data)
song_data.head()

Unnamed: 0,artist_name,id,track_name,artist_pop,genres,track_pop,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,Lose Control (feat. Ciara & Fat Man Scoop),74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69,0.904,0.813,4,-7.105,0,0.121,0.0311,0.00697,0.0471,0.81,125.461,4
6,Britney Spears,6I9VzXrHxO9rA9A5euc8Ak,Toxic,84,dance_pop pop post-teen_pop,83,0.774,0.838,5,-3.914,0,0.114,0.0249,0.025,0.242,0.924,143.04,4
19,Beyoncé,0WqIKmW4BTrj3eJFmnCKMv,Crazy In Love,86,dance_pop pop r&b,25,0.664,0.758,2,-6.583,0,0.21,0.00238,0.0,0.0598,0.701,99.259,4
46,Justin Timberlake,1AWQoqb9bSvzTjaLralEkT,Rock Your Body,82,dance_pop pop,79,0.892,0.714,4,-6.055,0,0.141,0.201,0.000234,0.0521,0.817,100.972,4
55,Shaggy,1lzr43nnXAijIGYnCT8M8H,It Wasn't Me,75,pop_rap reggae_fusion,2,0.853,0.606,0,-4.596,1,0.0713,0.0561,0.0,0.313,0.654,94.759,4



# Preprocessing the column genres into list to OHE later


In [7]:
def genre_preprocess(df):
    df['genres_list'] = df['genres'].apply(lambda x: x.split(" "))
    return df
song_data = genre_preprocess(song_data)
song_data['genres_list'].head()

0     [dance_pop, hip_hop, hip_pop, pop, pop_rap, r&...
6                       [dance_pop, pop, post-teen_pop]
19                                [dance_pop, pop, r&b]
46                                     [dance_pop, pop]
55                             [pop_rap, reggae_fusion]
Name: genres_list, dtype: object

# Pipeline for dataframe to this very step, function that summarises all the processes done above

In [8]:
def playlist_preprocess(df):
    
    df = drop_duplicate_songs(df)
    df = select_features(df)
    df = genre_preprocess(df)

    return df

# Feature generation, inlcuding:
    1. Sentiment anlysis
    2. TF-IDF
    3. OHE
    4. Normalization

# #Sentiment alaysis

In [9]:
def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity

def getPolarity(text):
  return TextBlob(text).sentiment.polarity

def getAnalysis(score, task="polarity"):
  if task == "subjectivity":
    if score < 1/3:
      return "low"
    elif score > 1/3:
      return "high"
    else:
      return "medium"
  else:
    if score < 0:
      return 'Negative'
    elif score == 0:
      return 'Neutral'
    else:
      return 'Positive'

def sentiment_analysis(df, text_col):
  df['subjectivity'] = df[text_col].apply(getSubjectivity).apply(lambda x: getAnalysis(x,"subjectivity"))
  df['polarity'] = df[text_col].apply(getPolarity).apply(getAnalysis)
  return df

In [10]:
sentiment = sentiment_analysis(song_data, "track_name")
sentiment.head()

Unnamed: 0,artist_name,id,track_name,artist_pop,genres,track_pop,danceability,energy,key,loudness,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres_list,subjectivity,polarity
0,Missy Elliott,0UaMYEvWZi0ZqiDOoHU3YI,Lose Control (feat. Ciara & Fat Man Scoop),74,dance_pop hip_hop hip_pop pop pop_rap r&b rap ...,69,0.904,0.813,4,-7.105,...,0.121,0.0311,0.00697,0.0471,0.81,125.461,4,"[dance_pop, hip_hop, hip_pop, pop, pop_rap, r&...",low,Neutral
6,Britney Spears,6I9VzXrHxO9rA9A5euc8Ak,Toxic,84,dance_pop pop post-teen_pop,83,0.774,0.838,5,-3.914,...,0.114,0.0249,0.025,0.242,0.924,143.04,4,"[dance_pop, pop, post-teen_pop]",low,Neutral
19,Beyoncé,0WqIKmW4BTrj3eJFmnCKMv,Crazy In Love,86,dance_pop pop r&b,25,0.664,0.758,2,-6.583,...,0.21,0.00238,0.0,0.0598,0.701,99.259,4,"[dance_pop, pop, r&b]",high,Negative
46,Justin Timberlake,1AWQoqb9bSvzTjaLralEkT,Rock Your Body,82,dance_pop pop,79,0.892,0.714,4,-6.055,...,0.141,0.201,0.000234,0.0521,0.817,100.972,4,"[dance_pop, pop]",low,Neutral
55,Shaggy,1lzr43nnXAijIGYnCT8M8H,It Wasn't Me,75,pop_rap reggae_fusion,2,0.853,0.606,0,-4.596,...,0.0713,0.0561,0.0,0.313,0.654,94.759,4,"[pop_rap, reggae_fusion]",low,Neutral


# #One hot encoding

In [11]:
def ohe_prep(df, column, new_name): 
    
    tf_df = pd.get_dummies(df[column])
    feature_names = tf_df.columns
    tf_df.columns = [new_name + "_" + str(i) for i in feature_names]
    tf_df.reset_index(drop = True, inplace = True)    
    return tf_df

In [12]:
subject_ohe = ohe_prep(sentiment, 'subjectivity','subject')
subject_ohe.iloc[0]

subject_high      0
subject_low       1
subject_medium    0
Name: 0, dtype: uint8

# #TF-IDF vectorization

In [13]:
# TF-IDF implementation
tfidf = TfidfVectorizer()
tfidf_matrix =  tfidf.fit_transform(song_data['genres_list'].apply(lambda x: " ".join(x)))
genre_df = pd.DataFrame(tfidf_matrix.toarray())
genre_df.columns = ['genre' + "_" + i for i in tfidf.get_feature_names()]
genre_df.drop(columns='genre_unknown')
genre_df.reset_index(drop = True, inplace=True)
genre_df.iloc[0]



genre_21st_century_classical    0.0
genre_432hz                     0.0
genre__hip_hop                  0.0
genre__roll                     0.0
genre_a_cappella                0.0
                               ... 
genre_zambian_hip_hop           0.0
genre_zhongguo_feng             0.0
genre_zolo                      0.0
genre_zouk                      0.0
genre_zouk_riddim               0.0
Name: 0, Length: 2147, dtype: float64

# # Normalizing some features

In [14]:
print(song_data['artist_pop'].describe())

count    34247.000000
mean        61.916606
std         19.120147
min          0.000000
25%         51.000000
50%         65.000000
75%         76.000000
max        100.000000
Name: artist_pop, dtype: float64


In [15]:
std_popularity = song_data[["artist_pop"]].reset_index(drop = True)
scaler = MinMaxScaler()
std_popularity = pd.DataFrame(scaler.fit_transform(std_popularity), columns = std_popularity.columns)
std_popularity.head()

Unnamed: 0,artist_pop
0,0.74
1,0.84
2,0.86
3,0.82
4,0.75


# # Generating all feature of data

In [16]:
def create_feature_set(df, float_cols):
    '''
    Process spotify df to create a final set of features that will be used to generate recommendations
    ---
    Input: 
    df (pandas dataframe): Spotify Dataframe
    float_cols (list(str)): List of float columns that will be scaled
            
    Output: 
    final (pandas dataframe): Final set of features 
    '''
    
    # Tfidf genre lists
    tfidf = TfidfVectorizer()
    tfidf_matrix =  tfidf.fit_transform(df['genres_list'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "_" + i for i in tfidf.get_feature_names()]
    genre_df.drop(columns='genre_unknown') # drop unknown genre
    genre_df.reset_index(drop = True, inplace=True)
    
    # Sentiment analysis
    df = sentiment_analysis(df, "track_name")

    # One-hot Encoding
    subject_ohe = ohe_prep(df, 'subjectivity','subject') * 0.3
    polar_ohe = ohe_prep(df, 'polarity','polar') * 0.5
    key_ohe = ohe_prep(df, 'key','key') * 0.5
    mode_ohe = ohe_prep(df, 'mode','mode') * 0.5

    # Normalization
    # Scale popularity columns
    pop = df[["artist_pop","track_pop"]].reset_index(drop = True)
    scaler = MinMaxScaler()
    pop_scaled = pd.DataFrame(scaler.fit_transform(pop), columns = pop.columns) 

    # Scale audio columns
    floats = df[float_cols].reset_index(drop = True)
    scaler = MinMaxScaler()
    floats_scaled = pd.DataFrame(scaler.fit_transform(floats), columns = floats.columns)
    
    # Scale 'time_signature'
    ts = df[['time_signature']].reset_index(drop = True)
    scaler = MinMaxScaler()
    ts_scaled = pd.DataFrame(scaler.fit_transform(ts), columns = ts.columns)

    # Concanenate all features
    final = pd.concat([genre_df, floats_scaled, ts_scaled, pop_scaled, subject_ohe, polar_ohe, key_ohe, mode_ohe], axis = 1)
    
    # Add song id
    final['id']=df['id'].values
    
    return final

In [17]:
float_cols = song_data.dtypes[song_data.dtypes == 'float64'].index.values
song_data.to_csv(".../Data/allsong_data_final.csv", index = False)

# Generate features
complete_feature_set = create_feature_set(song_data, float_cols=float_cols)
complete_feature_set.to_csv(".../Data/complete_feature_final.csv", index = False)
complete_feature_set.head()



Unnamed: 0,genre_21st_century_classical,genre_432hz,genre__hip_hop,genre__roll,genre_a_cappella,genre_abstract_beats,genre_abstract_hip_hop,genre_accordion,genre_acid_jazz,genre_acid_rock,...,key_5,key_6,key_7,key_8,key_9,key_10,key_11,mode_0,mode_1,id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0UaMYEvWZi0ZqiDOoHU3YI
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,6I9VzXrHxO9rA9A5euc8Ak
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0WqIKmW4BTrj3eJFmnCKMv
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,1AWQoqb9bSvzTjaLralEkT
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1lzr43nnXAijIGYnCT8M8H


# # Testing on playlist

In [18]:
playlist_test = pd.read_csv(".../Data/test_playlist.csv")
playlist_test = playlist_preprocess(playlist_test)
print(playlist_test.shape)
playlist_test.head()


(74, 19)


Unnamed: 0,artist_name,id,track_name,artist_pop,genres,track_pop,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres_list
0,The Killers,7oK9VyNzrYvRFo7nQEYkWN,Mr. Brightside,80,alternative_rock dance_rock modern_rock perman...,78,0.356,0.924,1,-3.74,1,0.0808,0.00101,0.0,0.0953,0.232,148.017,4,"[alternative_rock, dance_rock, modern_rock, pe..."
1,Rihanna,6qn9YLKt13AGvpq9jfO8py,We Found Love,90,barbadian_pop dance_pop pop pop_rap urban_cont...,77,0.734,0.766,1,-4.485,1,0.0383,0.025,0.00138,0.108,0.6,127.986,4,"[barbadian_pop, dance_pop, pop, pop_rap, urban..."
2,American Authors,5j9iuo3tMmQIfnEEQOOjxh,Best Day Of My Life,70,indie_poptimism modern_alternative_rock modern...,0,0.67,0.905,2,-2.385,1,0.0339,0.0625,0.000151,0.0577,0.516,100.021,4,"[indie_poptimism, modern_alternative_rock, mod..."
3,Clean Bandit,5HuqzFfq2ulY1iBAW5CxLe,Rather Be (feat. Jess Glynne),80,dance_pop edm pop pop_dance tropical_house uk_...,53,0.799,0.586,11,-6.735,1,0.0377,0.162,2e-06,0.193,0.549,120.97,4,"[dance_pop, edm, pop, pop_dance, tropical_hous..."
4,Sia,4VrWlk8IQxevMvERoX08iC,Chandelier,89,australian_dance australian_pop pop,81,0.399,0.787,1,-2.88,1,0.0499,0.0197,6.1e-05,0.0685,0.572,117.089,5,"[australian_dance, australian_pop, pop]"


# # Recommendation 1st type
## This version of recommendation uses similarity matrix, which is created from the initial feature set and the new playlist.
## It recommends the best suited song for each song from playlist separately.

In [19]:
def generate_playlist_feature(complete_feature_set, playlist_test):
    
    complete_feature_set_playlist = complete_feature_set[complete_feature_set['id'].isin(playlist_test['id'].values)]
    
    
    complete_feature_set_nonplaylist = complete_feature_set[~complete_feature_set['id'].isin(playlist_test['id'].values)]
    #complete_feature_set_playlist_final = complete_feature_set_playlist.drop(columns = "id")

#   return complete_feature_set_playlist_final, complete_feature_set_nonplaylist
    return complete_feature_set_playlist, complete_feature_set_nonplaylist

In [20]:
# Generate the features
complete_feature_set_playlist, complete_feature_set_nonplaylist = generate_playlist_feature(complete_feature_set, playlist_test)
# Non-playlist features
print(complete_feature_set_nonplaylist.shape)
print(complete_feature_set_playlist.shape)
print(complete_feature_set.shape)


(34173, 2180)
(74, 2180)
(34247, 2180)


In [21]:
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


def make_recommendations(song_data, complete_feature_set_playlist, complete_feature_set_nonplaylist, n_components):
    pca = PCA(n_components=n_components)
    playlist_PCA = pca.fit_transform(complete_feature_set_playlist.drop(columns="id"))
    features_PCA = pca.transform(complete_feature_set_nonplaylist.drop(columns="id"))

    similarity_matrix = cosine_similarity(playlist_PCA, features_PCA)
    top_n = 10

    # Initialize an empty DataFrame to store recommendations
    recommendations_df = pd.DataFrame()

    # Iterate over each row (existing song) in the similarity matrix
    for i in range(len(playlist_PCA)):
        # Get the indices of the top N similar songs
        top_indices = np.argsort(similarity_matrix[i])[-top_n:]

        # Get the song information for the top recommended songs
        recommended_songs = complete_feature_set_nonplaylist.iloc[top_indices]

        # Append the recommendations to the DataFrame
        recommendations_df = recommendations_df.append(recommended_songs, ignore_index=True)
    
    recommend = recommendations_df.merge(song_data, on='id', how='inner')
    return recommend
    

In [22]:
recommendation1 = make_recommendations(song_data, complete_feature_set_playlist, complete_feature_set_nonplaylist, n_components = 10)
print(recommendation1[['artist_name','track_name', 'id']])

  recommendations_df = recommendations_df.append(recommended_songs, ignore_index=True)


              artist_name                      track_name  \
0     My Chemical Romance         Thank You For The Venom   
1                 Nirvana                Heart-Shaped Box   
2               blink-182                          Misery   
3    Sleeping With Sirens     Do It Now Remember It Later   
4         Pierce The Veil                  King For A Day   
..                    ...                             ...   
735           Joey Bada$$  Enter The Void (feat. Ab-Soul)   
736  Sleeping With Sirens                        F**K You   
737     Empire of the Sun                Tiger By My Side   
738  Bring Me The Horizon                   What You Need   
739           Def Leppard           Pour Some Sugar On Me   

                         id  
0    0BpKPyXhWsTaWkgXeOd1Sn  
1    11LmqTE2naFULdEP94AUBa  
2    1gofFAslBqWqeZ1F73GaJe  
3    3ikf8zLuO1MACbNFfDvJ12  
4    1IT0WQk5J8NsaeII8ktdlZ  
..                      ...  
735  6K7wB7VNOJMWFrRrfAhAxo  
736  6NpUCODxbUGkruC9WCno5I

# #  Recommendation 2nd type
## This version takes the entire playlist, for which the recommendation should me made, and converts it into a single vector, then uses cosine similarity to find appropriate tracks for it

In [23]:
def make_recommendations2(song_data, complete_feature_set_playlist, complete_feature_set_nonplaylist):
    playlist_mean = complete_feature_set_playlist.drop(columns="id").mean().values
    
    non_playlist_df = song_data[song_data['id'].isin(complete_feature_set_nonplaylist['id'].values)]
    non_playlist_df['sim'] = cosine_similarity(complete_feature_set_nonplaylist.drop('id', axis = 1).values, playlist_mean.reshape(1, -1))[:,0]
    non_playlist_df_top_40 = non_playlist_df.sort_values('sim',ascending = False).head(40)
    return non_playlist_df_top_40

In [25]:
recommendation2 = make_recommendations2(song_data, complete_feature_set_playlist, complete_feature_set_nonplaylist)
recommendation2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_playlist_df['sim'] = cosine_similarity(complete_feature_set_nonplaylist.drop('id', axis = 1).values, playlist_mean.reshape(1, -1))[:,0]


Unnamed: 0,artist_name,id,track_name,artist_pop,genres,track_pop,danceability,energy,key,loudness,...,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres_list,subjectivity,polarity,sim
51128,American Authors,1obisQNOcikRvTdStbW3pG,Go Big Or Go Home,70,indie_poptimism modern_alternative_rock modern...,63,0.665,0.875,1,-4.272,...,0.00939,0.0,0.0897,0.66,122.008,4,"[indie_poptimism, modern_alternative_rock, mod...",low,Neutral,0.922576
28834,American Authors,64ybTt8CKxPdeXBNnu08Op,Believer,70,indie_poptimism modern_alternative_rock modern...,55,0.583,0.968,1,-2.909,...,0.00141,0.0115,0.13,0.91,119.999,4,"[indie_poptimism, modern_alternative_rock, mod...",low,Neutral,0.922479
43254,The 1975,51cd3bzVmLAjlnsSZn4ecW,She's American,78,modern_alternative_rock modern_rock pop rock,55,0.647,0.857,1,-3.94,...,0.167,0.000437,0.0763,0.55,115.976,4,"[modern_alternative_rock, modern_rock, pop, rock]",low,Neutral,0.921655
53025,Bruno Mars,1KtU0WCq472KzqCXgMOxkS,That's What I Like - Alan Walker Remix,92,dance_pop pop,62,0.692,0.896,1,-4.017,...,0.0121,6.6e-05,0.199,0.798,134.056,4,"[dance_pop, pop]",low,Neutral,0.920234
54403,American Authors,4gHD93RNqEhEh2NkYzl3x6,Luck,70,indie_poptimism modern_alternative_rock modern...,54,0.554,0.806,0,-3.463,...,0.00177,0.0,0.165,0.646,144.923,4,"[indie_poptimism, modern_alternative_rock, mod...",low,Neutral,0.919301
43278,The 1975,3xrwXWG4O9uhtRyAd3MCou,Heart Out,78,modern_alternative_rock modern_rock pop rock,54,0.706,0.83,2,-4.918,...,0.00822,0.00112,0.0763,0.886,118.446,4,"[modern_alternative_rock, modern_rock, pop, rock]",low,Neutral,0.917319
59061,Selena Gomez,5VQ0SPGs7vdzQCIzsHTNUz,Body Heat,86,dance_pop pop post-teen_pop,40,0.74,0.974,1,-3.585,...,0.189,0.0,0.0849,0.766,120.041,4,"[dance_pop, pop, post-teen_pop]",low,Neutral,0.914403
28926,Neon Trees,0K1KOCeJBj3lpDYxEX9qP2,Sleeping With A Friend,71,modern_alternative_rock modern_rock pop pop_ro...,59,0.582,0.882,2,-4.256,...,0.00189,1e-05,0.32,0.507,107.034,4,"[modern_alternative_rock, modern_rock, pop, po...",low,Neutral,0.913492
14555,The 1975,6OPOa3qlKoDUzGpS8MrcLi,This Must Be My Dream,78,modern_alternative_rock modern_rock pop rock,52,0.592,0.84,9,-4.697,...,0.0193,0.000191,0.327,0.679,103.026,4,"[modern_alternative_rock, modern_rock, pop, rock]",low,Neutral,0.913332
33788,Bruno Mars,1I6pKIyaBp4OebTGLJpCCC,Perm,92,dance_pop pop,68,0.853,0.871,8,-3.715,...,0.0145,6.8e-05,0.198,0.863,124.021,4,"[dance_pop, pop]",low,Neutral,0.912762
