In [1]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import sys
import json
from dotenv import load_dotenv
import pandas as pd
import datetime
import pytz
from dateutil.relativedelta import relativedelta

# Connecting to Spotify API

In [2]:
load_dotenv()
if len(sys.argv) > 1:
    username = sys.argv[1]
else:
    print("usage: python3 file.py [username]")
    sys.exit()

In [3]:
scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [4]:
playlists_res = sp.current_user_playlists()['items']
playlists = {}

for item in playlists_res:
  playlists[item['name']] = item['id']

playlists

{'sum': '6URjLae4AnvscdYVVB9Tqq',
 'rihanna': '76QhOFRR5kyyrbupyT9R9I',
 'RO tation': '6eQTWnnnbFDm3QRy7lgSsr',
 'dj': '7BKQPJIKcp9mKV24LyLq61',
 'chill': '1XPdaFPovJmoL9OzDAYJ5Z',
 'old': '3Ehc9YycykfZ6ZvZ6GlIt2'}

# Constructing Input Data From Playlist

In [5]:
id = playlists['RO tation'] # This is what the input to the model will be, playlist ID
def tracks_from_playlist(id):
    """
    Given a playlist id, returns a pandas dataframe consisting of key elements of each song
    """
    playlist = sp.playlist(id)
    tracks = []
    for item in playlist['tracks']['items']:
        if item['track']['id'] is not None:
            track = item['track']
            track_id = track['id']
            artist_ids = [artist['id'] for artist in track['artists']] if len(track['artists']) > 0 else None
            artist_names = [artist['name'] for artist in track['artists']] if len(track['artists']) > 0 else None
            track_info = {
                'track_name': track['name'],
                'track_id': track_id,
                'artists': artist_names,
                'artist_ids': artist_ids,
                'album_name': track['album']['name'],
                'duration_ms': track['duration_ms'],
                'explicit': track['explicit'],
                'popularity': track['popularity'],
                'date_added': item['added_at']
            }
            tracks.append(track_info)

    tracks_df = pd.DataFrame(tracks)
    tracks_df['date_added'] = pd.to_datetime(tracks_df['date_added'], utc=True)
    now = datetime.datetime.now(pytz.utc)
    tracks_df['months_since_added'] = tracks_df['date_added'].apply(lambda x: relativedelta(now, x).months)
    tracks_df = tracks_df.drop(['date_added'], axis=1)

    return tracks_df

# rotation_df = tracks_from_playlist(id)
# rotation_df

In [6]:
def extract_tracks_features(ids):
  """
  Given a list of track ids, returns a pandas dataframe of key audio features of each track
  """
  audio_features_list = []
  for track_id in ids:
    if track_id is not None:
      audio_features = sp.audio_features(track_id)[0]
      audio_features_list.append(audio_features)

  # convert list of dictionaries to Pandas DataFrame
  audio_features_df = pd.DataFrame.from_records(audio_features_list, columns=audio_features_list[0].keys())

  # add track ID column to DataFrame
  audio_features_df['track_id'] = ids

  # re-order columns to put track_id first
  cols = audio_features_df.columns.tolist()
  cols = cols[-1:] + cols[:-1]
  audio_features_df = audio_features_df[cols]
  audio_features_df = audio_features_df.drop(['type', 'id', 'uri', 'track_href', 'analysis_url', 'track_id'], axis=1)
  return audio_features_df

# tracks_df = extract_tracks_features(tracks_from_playlist(id)['track_id'])
# tracks_df

In [7]:
tracks_df1 = tracks_from_playlist(id)
tracks_df2 = extract_tracks_features(tracks_df1['track_id'])
playlist_df = pd.concat([tracks_df1,tracks_df2], axis=1)
playlist_df

# Generate Recommendations