In [115]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from Secrets import CLIENT_ID, CLIENT_SECRET, REDIRECT_URL
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from dateutil.relativedelta import relativedelta
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [116]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID,
                                               client_secret=CLIENT_SECRET,
                                               redirect_uri=REDIRECT_URL,
                                               scope="user-library-read"))

df = pd.read_csv('./dataset.csv')
df = df[df.columns[1:]]
df = df.drop(columns=['time_signature', 'duration_ms'])
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',504)
pd.set_option('display.width',1000)


## Feature Engineering

In [117]:
def feature_engineer(df):
    scaler = MinMaxScaler()
    
    features_to_scale = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']
    features_to_OHE = ['key', 'explicit', 'popularity']
    
    tfidf = TfidfVectorizer()
    df['track_genre'] = df['track_genre'].ffill()
    tfidf_matrix = tfidf.fit_transform(df['track_genre'])
    genre_df = pd.DataFrame(tfidf_matrix.toarray())

    # Ensure scaling does not affect 'date_added'
    df[features_to_scale] = scaler.fit_transform(df[features_to_scale])
    
    def perform_OHE(df, column):
        OHE = pd.get_dummies(df[column], drop_first=True, dtype=int, prefix=column)
        df = df.drop(columns=column, axis='columns')
        df = pd.concat([df, OHE], axis=1)
        return df

    # Modify popularity without affecting 'date_added'
    df['popularity'] = df['popularity'].apply(lambda x: x//5)

    # Perform OHE and ensure 'date_added' is retained
    for feature in features_to_OHE:
        df = perform_OHE(df, feature)
    df = pd.concat([df, genre_df], axis=1)
    df = df.drop(columns=['track_genre'], axis = 'columns')
    return df

In [118]:
#6meRpNHvKC1VcBl5MIbVxo - hehe
#1Ob3QykC4dzf3rQwWxE9LV - study
#2dxVgP67xqsTCTfaVed4E0 - test

playlist_id = '6meRpNHvKC1VcBl5MIbVxo'
playlist_length = sp.playlist_items(playlist_id)['total']

track_ids = []
date_added = {}


for i in range(playlist_length // 100 + 1):
    playlist = sp.playlist_tracks(playlist_id, market = 'CAN',fields='items', limit=100, offset = i * 100 )
    for index, track in enumerate(playlist['items']):
        if track['track'] is not None and track['track']['id'] is not None:
            date_added[track['track']['id']] = playlist['items'][index]['added_at']
            track_ids.append(track['track']['id'])

In [119]:
playlist_audio_data = []
artist_genres = {}

#Fetching track info from the Spotify API
playlist_track_audio_features = []
count = 0
for i in range(0,len(track_ids), 100):
    playlist_track_audio_features.extend(sp.audio_features(tracks = track_ids[i : i + 100]))

playlist_track_info = []
for i in range(0, len(track_ids), 50):
    playlist_track_info.extend(sp.tracks(track_ids[i : i + 50])['tracks'])

In [120]:
# Processing audio features and info for each track in the playlist
track_audio_features = [feature for feature in playlist_track_audio_features]
track_info = [info for info in playlist_track_info]
track_artist_ids = [info['artists'][0]['id'] for info in playlist_track_info]

In [121]:
#Fetching artist Genres
for i in range(0, len(track_artist_ids), 50):
    results = sp.artists(track_artist_ids[i: i + 50])
    for artist in results['artists']:
        artist_genres[artist['name']] = artist_genres.get(artist['name'], artist['genres'][0] if artist['genres'] else None)

In [122]:
#Processing and adding the audio features to the dataset

for index ,id in enumerate(track_ids):
    track_artists = [artist['name'] for artist in track_info[index]['artists']]
    main_artist_genre = artist_genres[track_artists[0]]
    audio_features_keys = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    audio_data = {
        'track_id': id,
        'artists': ';'.join(track_artists),
        'album_name': track_info[index]['album']['name'],
        'track_name': track_info[index]['name'],
        'popularity': track_info[index]['popularity'],
        'explicit': track_info[index]['explicit'],
        **{key: track_audio_features[index][key] for key in audio_features_keys},
        'track_genre': main_artist_genre
    }
    df.loc[len(df.index)] = audio_data

In [123]:
df = feature_engineer(df)

In [124]:
track_data = df.tail(playlist_length - 1).copy()
df = df.iloc[:-playlist_length - 1]
track_data['date_added'] = date_added.values()


In [125]:
# Assuming track_data is already defined and feature_engineered
track_data = pd.DataFrame(track_data)
track_data['date_added'] = pd.to_datetime(track_data['date_added']).dt.date
first_date = track_data.iloc[0]['date_added']
last_date = track_data.iloc[-1]['date_added']

# Calculate total months difference for normalization
total_months = relativedelta(last_date, first_date).months + 1

# Calculate weights using linear descent
weights = []
for index, row in track_data.iterrows():
    months_difference = relativedelta(row['date_added'], first_date).months
    # Linear descent weighting
    # weight = (total_months - months_difference)/ total_months
    weights.append(round(1/(months_difference + 1), 3))

track_data['weight'] = weights
track_data = track_data.drop(columns=['date_added'])


In [126]:
track_data.head()

Unnamed: 0,track_id,artists,album_name,track_name,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9,key_10,key_11,explicit_True,popularity_1,popularity_2,popularity_3,popularity_4,popularity_5,popularity_6,popularity_7,popularity_8,popularity_9,popularity_10,popularity_11,popularity_12,popularity_13,popularity_14,popularity_15,popularity_16,popularity_17,popularity_18,popularity_19,popularity_20,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,weight
114000,3F5CgOj3wFlRv51JsHbxhe,Drake;21 Savage,"Honestly, Nevermind",Jimmy Cooks (feat. 21 Savage),0.537056,0.673,0.829033,1,0.181347,0.000308,2e-06,0.093,0.367839,165.921,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.764099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.482767,0.0,0.427889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
114001,2BcMwX1MPV6ZHP4tUT9uq6,Drake;21 Savage;Project Pat,Certified Lover Boy,Knife Talk (with 21 Savage ft. Project Pat),0.861929,0.424,0.73899,0,0.335751,0.063755,0.0,0.0834,0.153769,145.887,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.764099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.482767,0.0,0.427889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
114002,3CA9pLiwRIGtUBiMjbZmRw,Drake,Scorpion,Nice For What,0.593909,0.909,0.796423,1,0.073264,0.089458,9.7e-05,0.119,0.761809,93.372,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.764099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.482767,0.0,0.427889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
114003,2tUL6dZf1mywCj5WvCPZw6,Drake,Certified Lover Boy,No Friends In The Industry,0.852792,0.744,0.828293,1,0.117098,0.018173,0.0,0.629,0.171859,83.737,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.764099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.482767,0.0,0.427889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
114004,6c5wQFfJApRMooKE7UQnlH,Baby Keem;Travis Scott,The Melodic Blue,durag activity (with Travis Scott),0.906599,0.759,0.837652,1,0.320207,0.004056,0.0,0.0865,0.500503,80.45,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.748361,0.0,0.663292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [131]:
start_index = 0
end_index = 1
num_rows = len(track_data.values)
num_cols = len(track_data.columns)
results_array = [0] * (num_cols - 5)
for i in range(num_rows):
    row = track_data.iloc[i]
    row_weight = track_data.iloc[i, -1]
    for index, value in enumerate(row[4:-1]):
        results_array[index] += value * row_weight

In [128]:
# item_51203 = df.iloc[4131][4:].values.reshape(1, -1)

# # Reshape the results_array to a 2D array
# results_array_reshaped = np.array(results_array).reshape(1, -1)

# # Calculate cosine similarity
# similarity = cosine_similarity(item_51203, results_array_reshaped)

# print(similarity)

In [132]:

vectors = df.iloc[:, 4:].values
input_vector = np.array(results_array)
similarity_matrix = cosine_similarity(input_vector.reshape(1,-1), vectors)


similarity_array = similarity_matrix.flatten()
top_40_indices = similarity_array.argsort()[-40:][::-1]

top_40_vectors = vectors[top_40_indices]


In [133]:
for index in top_40_indices:
    track_name = df.iloc[index]['track_name']
    artist = df.iloc[index]['artists']
    print(f"track: {track_name} || artist: {artist}")

track: RAP DEVIL || artist: Machine Gun Kelly
track: Real Life || artist: Burna Boy;Stormzy
track: Your Turn || artist: Ty Dolla $ign;Musiq Soulchild;Tish Hyman;6LACK
track: Solid || artist: Burna Boy;Blxst;Kehlani
track: Pursuit Of Happiness - Extended Steve Aoki Remix || artist: Kid Cudi;MGMT;Ratatat;Steve Aoki
track: Usain Boo || artist: Kodak Black
track: Grrrls || artist: Lizzo
track: Grrrls || artist: Lizzo
track: Save You || artist: Kodak Black
track: Bezerk || artist: Big Sean;Hit-Boy;A$AP Ferg
track: Codeine Dreaming || artist: Kodak Black;Lil Wayne
track: WORKIN ME || artist: Quavo
track: Clout || artist: Offset;Cardi B
track: Polo Jeans || artist: Mac Miller;Earl Sweatshirt
track: Se Menea || artist: Don Omar;Nio Garcia
track: Me and Klik || artist: Kodak Black;John Wicks;Jackboy
track: Me and Klik || artist: Kodak Black;John Wicks;Jackboy
track: Ball If I Want To || artist: DaBaby
track: Banking on Me || artist: Gunna
track: Banking on Me || artist: Gunna
track: Banking on 