In [76]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import sys
import json
from dotenv import load_dotenv
import pandas as pd
import datetime
import pytz
from dateutil.relativedelta import relativedelta

# Dataset Setup 

In [87]:
all_tracks_df = pd.read_csv('tracks_features.csv')
all_tracks_df = all_tracks_df.drop(['album_id', 'track_number', 'disc_number', 'year', 'release_date'],axis=1)
all_tracks_df

Unnamed: 0,id,name,album,artists,artist_ids,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,7lmeHLHBe4nmXzuXc0HDjk,Testify,The Battle Of Los Angeles,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],False,0.470,0.978,7,-5.399,1,0.0727,0.02610,0.000011,0.3560,0.503,117.906,210133,4.0
1,1wsRitfRRtWyEapl0q22o8,Guerrilla Radio,The Battle Of Los Angeles,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],True,0.599,0.957,11,-5.764,1,0.1880,0.01290,0.000071,0.1550,0.489,103.680,206200,4.0
2,1hR0fIFK2qRG3f3RF70pb7,Calm Like a Bomb,The Battle Of Los Angeles,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],False,0.315,0.970,7,-5.424,1,0.4830,0.02340,0.000002,0.1220,0.370,149.749,298893,4.0
3,2lbASgTSoDO7MTuLAXlTW0,Mic Check,The Battle Of Los Angeles,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],True,0.440,0.967,11,-5.830,0,0.2370,0.16300,0.000004,0.1210,0.574,96.752,213640,4.0
4,1MQTmpYOZ6fcMQc56Hdo7T,Sleep Now In the Fire,The Battle Of Los Angeles,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],False,0.426,0.929,2,-6.729,1,0.0701,0.00162,0.105000,0.0789,0.539,127.059,205600,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1204020,0EsMifwUmMfJZxzoMPXJKZ,Gospel of Juke,Notch - EP,['FVLCRVM'],['7AjItKsRnEYRSiBt2OxK1y'],False,0.264,0.966,5,-6.970,0,0.0672,0.00935,0.002240,0.3370,0.415,159.586,276213,4.0
1204021,2WSc2TB1CSJgGE0PEzVeiu,Prism Visions,Notch - EP,['FVLCRVM'],['7AjItKsRnEYRSiBt2OxK1y'],False,0.796,0.701,11,-6.602,0,0.0883,0.10400,0.644000,0.0749,0.781,121.980,363179,4.0
1204022,6iProIgUe3ETpO6UT0v5Hg,Tokyo 360,Notch - EP,['FVLCRVM'],['7AjItKsRnEYRSiBt2OxK1y'],False,0.785,0.796,9,-5.960,0,0.0564,0.03040,0.918000,0.0664,0.467,121.996,385335,4.0
1204023,37B4SXC8uoBsUyKCWnhPfX,Yummy!,Notch - EP,['FVLCRVM'],['7AjItKsRnEYRSiBt2OxK1y'],False,0.665,0.856,6,-6.788,0,0.0409,0.00007,0.776000,0.1170,0.227,124.986,324455,4.0


In [None]:
# Create vectors for each track
def create_vectors():
    """
    Create Vectors for each track in 'spotify db'
    """
    pass

# Connecting to Spotify API

In [78]:
load_dotenv()
scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [79]:
playlists_res = sp.current_user_playlists()['items']
playlists = {}

for item in playlists_res:
  playlists[item['name']] = item['id']

playlists
id = playlists['RO tation'] # This is what the input to the model will be, playlist ID

# Constructing Input Data From Playlist

In [80]:
def tracks_from_playlist(id):
    """
    Given a playlist id, returns a pandas dataframe consisting of key elements of each song
    """
    playlist = sp.playlist(id)
    tracks = []
    for item in playlist['tracks']['items']:
        if item['track']['id'] is not None:
            track = item['track']
            track_id = track['id']
            track_info = {
                'track_id': track_id,
                'duration_ms': track['duration_ms'],
                'explicit': track['explicit'],
                'popularity': track['popularity'],
                'date_added': item['added_at']
            }
            tracks.append(track_info)

    tracks_df = pd.DataFrame(tracks)
    tracks_df['date_added'] = pd.to_datetime(tracks_df['date_added'], utc=True)
    now = datetime.datetime.now(pytz.utc)
    tracks_df['months_since_added'] = tracks_df['date_added'].apply(lambda x: relativedelta(now, x).months)
    tracks_df = tracks_df.drop(['date_added'], axis=1)

    return tracks_df

In [81]:
def extract_tracks_features(ids):
  """
  Given a list of track ids, returns a pandas dataframe of key audio features of each track
  """
  audio_features_list = []
  for track_id in ids:
    if track_id is not None:
      audio_features = sp.audio_features(track_id)[0]
      audio_features_list.append(audio_features)

  # convert list of dictionaries to Pandas DataFrame
  audio_features_df = pd.DataFrame.from_records(audio_features_list, columns=audio_features_list[0].keys())

  # add track ID column to DataFrame
  audio_features_df['track_id'] = ids

  # re-order columns to put track_id first
  cols = audio_features_df.columns.tolist()
  cols = cols[-1:] + cols[:-1]
  audio_features_df = audio_features_df[cols]
  audio_features_df = audio_features_df.drop(['type', 'id', 'uri', 'track_href', 'analysis_url', 'track_id'], axis=1)
  return audio_features_df

In [82]:
def create_playlist_df(id):
    """
    Returns a dataframe for the playlsit with the given id. Attributes include thoe returned by
    Spotify Web API's GET Audio Features endpoint and track meta data like name, artist, etc.
    """
    df1 = tracks_from_playlist(id)
    df2 = extract_tracks_features(df1['track_id'])
    playlist_df = pd.concat([df1,df2], axis=1)
    return playlist_df

In [83]:
playlist_df = create_playlist_df(id)
playlist_df

Unnamed: 0,track_id,duration_ms,explicit,popularity,months_since_added,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms.1,time_signature
0,1sFstGV1Z3Aw5TDFCiT7vK,185062,True,63,3,0.725,0.601,1,-9.030,1,0.377,0.0321,0.000000,0.0504,0.811,155.918,185062,4
1,0v9Wz8o0BT8DU38R4ddjeH,304606,True,67,3,0.652,0.795,11,-5.192,0,0.174,0.1560,0.000000,0.1230,0.788,135.018,304607,4
2,3ZLyt2ndLFBh148XRYjYYZ,203794,False,57,3,0.531,0.544,0,-8.615,0,0.462,0.3080,0.000002,0.0835,0.398,131.921,203794,4
3,1QBwk6GTCxVdC2hoSw9tlM,307640,True,64,3,0.417,0.887,5,-5.551,0,0.367,0.0786,0.000000,0.8340,0.303,167.579,307640,4
4,1ZM8toCOlnfBKJdvR8GqUq,233922,True,59,3,0.687,0.783,0,-4.573,1,0.191,0.0526,0.000000,0.0718,0.359,156.141,233923,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,52uU5uNt9lOstGCc6C7QEJ,251906,True,50,0,0.473,0.913,11,-3.372,0,0.444,0.0271,0.000000,0.0784,0.721,94.571,251907,4
95,564oa00vY05d1uYnTEAAmE,183760,True,61,0,0.885,0.595,7,-5.487,1,0.286,0.6560,0.000004,0.1050,0.754,148.034,183760,4
96,6igOR5QCn09zAVF2ikZjsV,184093,True,55,0,0.930,0.561,0,-8.053,1,0.250,0.1010,0.000000,0.0992,0.596,140.987,184093,4
97,1GeNui6m825V8jP4uKiIaH,149040,True,68,0,0.974,0.596,6,-8.888,1,0.184,0.0976,0.000000,0.1510,0.892,111.959,149040,4


# Generate Recommendations

In [84]:
def create_playlist_vector(playlist_df, weight_factor=1.1):
    """
    Generates a single vector desribing a playlist dataframe

    Parameters:
    - playlist_df
    - weight_factor: value representing bias of more recently added songs
    """
    playlist_df= playlist_df.drop(['track_id'],axis=1)

    # Normalize data:
    

    # Compute Weight for each song of playlist; given from their months_since_added
    # More recent the song was added, the more weight is is given
    playlist_df['weight'] = playlist_df['months_since_added'].apply(lambda x: weight_factor ** (-x))
    playlist_df.update(playlist_df.mul(playlist_df.weight,0))

    sum_vect = playlist_df.sum(axis = 0)

    return sum_vect

In [85]:
playlist_vect = create_playlist_vector(playlist_df)
playlist_vect

duration_ms           1.779317e+07
explicit              7.693689e+01
popularity            5.323493e+03
months_since_added    1.605560e+02
danceability          5.748580e+01
energy                5.491464e+01
key                   4.299053e+02
loudness             -5.115185e+02
mode                  3.780316e+01
speechiness           1.645393e+01
acousticness          1.300458e+01
instrumentalness      5.006180e-01
liveness              1.905147e+01
valence               4.110781e+01
tempo                 9.606052e+03
duration_ms           1.779321e+07
time_signature        3.242014e+02
weight                6.807629e+01
dtype: float64

In [86]:
from sklearn.metrics.pairwise import cosine_similarity

def generate_recommendations(spotify_df, playlist_vect):
    """
    Return Recommenmdations based on playlist.

    Parameters:
    - spotify_df : Dataframe of all songs in spotify (or in the used dataset)
    - playlist_vect: vector representing the playlist
    
    """