In [1]:
from tkinter import *
from tkinter import filedialog as fd
import json
import pandas as pd
import numpy as np
import sys
import json
import spotipy.util as util
import spotipy
from spotipy.oauth2 import SpotifyOAuth
!{sys.executable} -m pip install spotipy



## Constructing dataset from requested Spotify personal data

#### Helper Methods

In [2]:
def countTracks(row):
    minPlayLength = 20000
    
    x = songPlays.loc[
        (songPlays["trackName"] == row.trackName) & 
        (songPlays["artistName"] == row.artistName) &
        (songPlays["msPlayed"] >= minPlayLength)]
    row["numPlays"] = len(x)
    return row

In [3]:
def appendNumPlays(uniqueSongs):
    uniqueSongs = uniqueSongs.apply(countTracks, axis=1)
    return uniqueSongs

In [4]:
def removeSongsFromPlaylists(toRemove, uniqueSongs):
    for playlist in playlists:
        if playlist["name"] in toRemove:
            #remove songs from toRemove playlists
            for song in playlist["items"]:
                uniqueSongs = uniqueSongs.loc[
                    (uniqueSongs["trackName"] != song["track"]["trackName"]) &
                    (uniqueSongs["artistName"] != song["track"]["artistName"])
                ]
    return uniqueSongs

In [5]:
#Should come after API section

def add_song_features_to_song_plays(row):
    # song = uniqueSongs.loc[(uniqueSongs["trackName"].str.match(row["trackName"])) & (uniqueSongs["artistName"].str.match(row["artistName"]))]
    song = uniqueSongs.loc[(uniqueSongs["trackName"].str.contains(row["trackName"], regex=False)) & (uniqueSongs["artistName"].str.contains(row["artistName"], regex=False))]
    
    for feature in ['valence', 'tempo','acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'speechiness']:
        # v = float(song[feature])
        row[feature] = song[feature].iloc[0]
        
    
    return row

#### Implementation

In [6]:
playlistFile = fd.askopenfilename()
streamsFile1 = fd.askopenfilename()
streamsFile2 = fd.askopenfilename()

In [7]:
# Load Files

with open(playlistFile, "rt") as f:
    playlists = json.load(f)
    playlists = playlists["playlists"]

with open(streamsFile1, "rt") as f:
    streams1 = json.load(f)
with open(streamsFile2, "rt") as f:
    streams2 = json.load(f)

In [8]:
# Prepare song plays (do in main scope if converted to script)

songPlays = pd.DataFrame(columns = ['trackName', 'artistName', 'msPlayed', 'endTime'])

for play in streams1:
    songPlays = songPlays.append(play, ignore_index = True)
for play in streams2:
    songPlays = songPlays.append(play, ignore_index = True)


In [None]:
# Construct dataframe containing set of songs played with song metadata 
# (do in main scope if converted to script)

uniqueSongs = pd.concat([songPlays['trackName'], songPlays['artistName']], axis=1)
uniqueSongs = tmp.drop_duplicates()
uniqueSongs = removeSongsFromPlaylists(["ChillHop Study Sessions"], uniqueSongs) # Remove songs from my study playlist
uniqueSongs = appendNumPlays(uniqueSongs)
uniqueSongs = uniqueSongs.sort_values("numPlays", ascending=False)
uniqueSongs = uniqueSongs.reset_index(drop=True)
uniqueSongs = uniqueSongs.loc[(~ uniqueSongs["trackName"].str.match("Throwaway") & (~ uniqueSongs["artistName"].str.match("Chitra"))



## Using API to further annotate dataset
#### (Helper Methods)

In [8]:
# Works with individual requests. Does not work with pd.dataframe.apply()

def get_URI_beta(trackName, artistName):

    results = sp_search.search(q='artist:' + artistName, type='track', limit=50)

    for track in results["tracks"]["items"]:
        if track["name"] == trackName:
            song = track["name"]
            artist = track["artists"][0]["name"]
            uri = track["uri"]
    uniqueSongs.loc[
        ((uniqueSongs["trackName"] == trackName) &
        (uniqueSongs["artistName"] == artistName)), "URI"] = uri


In [9]:
#Used with pd.dataframe.apply()

def get_URI(row):
    
    success = False
    results = sp_search.search(q='artist:' + row["artistName"], type='track', limit=50)

    #search for track by artist (up to first 50 results)
    for track in results["tracks"]["items"]:
        if track["name"] == row["trackName"]:
            row["URI"] = track["uri"]
            success = True
    
    #search for track by artist (results 50 to 100)
    if (not success) and len(results["tracks"]["items"]) == 50:
        results = sp_search.search(q='artist:' + row["artistName"], type='track', limit=50, offset=50)
        for track in results["tracks"]["items"]:
            if track["name"] == row["trackName"]:
                row["URI"] = track["uri"]
                success = True
    
    #search for track by song name (up to first 50 results)
    if not success:
        results = sp_search.search(q='track:' + row["trackName"], type='track', limit=50)
        for track in results["tracks"]["items"]:
            if track["artists"][0]["name"] == row["artistName"]:
                row["URI"] = track["uri"]
    
    #search for track by song name (results 50 to 100)
    if (not success) and len(results["tracks"]["items"]) == 50:
        results = sp_search.search(q='artist:' + row["artistName"], type='track', limit=50, offset=50)
        for track in results["tracks"]["items"]:
            if track["artists"][0]["name"] == row["artistName"]:
                row["URI"] = track["uri"]
    
    return row

In [10]:
def get_audio_features(row):
    features = sp_search.audio_features(tracks=[row["URI"]])[0] #get audio features of track
    if not type(features) is dict:
        return row
    
    #remove unwanted features
    for feature in ["mode", "id", "uri", "track_href", "analysis_url", "time_signature", "type"]:
        try:
            features.pop(feature)
        except AttributeError:
            continue
    
    #add fetures to row (song) entry
    for feature, value in features.items():
        row[feature] = value
        
    return row

In [11]:
def get_playlist_URI(title):
    pLists = sp_playlist.user_playlists("dowcowrox")
    for p in pLists["items"]:
        if p["name"] == title:
            return p["uri"]

#### Implementation (data annotation)

In [12]:
#Authentication

scope1 = "user-read-private" #for searching for URIs
scope2 = "playlist-modify-public" #for creating and modifying playlists
client_id = "53e9bb1b8d3d498dac14ef2c8507f800"
client_secret = "690ac39a2c7b4b188f8bf641c6dd5066"
redirect_url = "http://localhost:5000"

sp_search = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret= client_secret, redirect_uri=redirect_url, scope=scope1))
sp_playlist = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret= client_secret, redirect_uri=redirect_url, scope=scope2))

  

In [25]:
#USED FOR TESTING individual track searches (or artist if modified)

results = sp_search.search(q='track:' + "Suttin", type='track', limit=50)
for s in results["tracks"]["items"]:
    print(s["name"])
    print(s["artists"][0]["name"])
# help(sp.search)
    

In [66]:
# Additional metadata added to uniqueSongs dataframe

uniqueSongs = uniqueSongs.apply(get_URI, axis=1)
uniqueSongs = uniqueSongs.dropna() 
uniqueSongs = uniqueSongs.reset_index(drop=True)
uniqueSongs = uniqueSongs.apply(get_audio_features, axis=1)
uniqueSongs = uniqueSongs.dropna() # Note: all podcasts dropped at this stage
#Reorder columns
uniqueSongs = uniqueSongs[['trackName','artistName','numPlays','URI','valence', 'tempo', 'acousticness',
 'danceability','duration_ms','energy','instrumentalness',
 'key','liveness','loudness','speechiness']]


In [96]:
# Preview fully annotated data set
uniqueSongs

Unnamed: 0,URI,acousticness,artistName,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,numPlays,speechiness,tempo,trackName,valence
0,spotify:track:3sjZJ0ZEGUOAg6bs6FnBip,0.677000,Chitra,0.437,215526.0,0.422,0.000001,6.0,0.0846,-10.567,186,0.0419,160.185,Throwaway,0.209
1,spotify:track:4HeM8rZit0bMegXEGJ1Bp4,0.398000,Tee Peters,0.792,169500.0,0.457,0.000000,8.0,0.1490,-12.039,41,0.1400,79.999,Suttin,0.741
2,spotify:track:4ZaRg5Sf4TKr0YcFRLh7QJ,0.129000,UMI,0.613,190960.0,0.488,0.000021,8.0,0.3350,-7.370,35,0.0964,172.076,Love Affair,0.183
3,spotify:track:02wNtFxlr9dLmZXKT5TIF5,0.860000,The Marías,0.684,182900.0,0.232,0.404000,0.0,0.1350,-13.249,35,0.0297,73.518,Over the Moon,0.387
4,spotify:track:1iT25vbTtfYuC6KrK4vSEx,0.002370,Billy Talent,0.441,159693.0,0.877,0.000000,6.0,0.3580,-9.484,32,0.0897,161.934,The Ex,0.588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3664,spotify:track:3prlbHPeEgLmUihLu5twTc,0.000231,Moon Hooch,0.545,195673.0,0.824,0.107000,5.0,0.0555,-5.228,0,0.0393,132.066,Acid Mountain,0.687
3665,spotify:track:1gFNm7cXfG1vSMcxPpSxec,0.232000,The Beatles,0.818,188960.0,0.728,0.064200,10.0,0.2510,-8.331,0,0.0314,113.059,"Ob-La-Di, Ob-La-Da - Remastered 2009",0.975
3666,spotify:track:6j67aNAPeQ31uw4qw4rpLa,0.425000,The Beatles,0.792,52973.0,0.763,0.627000,2.0,0.7890,-11.185,0,0.0506,89.900,Wild Honey Pie - Remastered 2009,0.152
3667,spotify:track:389QX9Q1eUOEZ19vtzzI9O,0.020500,The Beatles,0.453,285000.0,0.654,0.003950,4.0,0.1720,-8.855,0,0.0317,115.000,While My Guitar Gently Weeps - Remastered 2009,0.702


In [156]:
# Write to file
with open("uniqueSongs.csv", "wt") as f:
    uniqueSongs.to_csv(f)

In [189]:
# Creates dataframe of song streaming history. Includes metadata for each stream.

tracks = list(uniqueSongs["trackName"])
artists = list(uniqueSongs["artistName"])
songPlays = songPlays.loc[((songPlays["trackName"].isin(tracks)) & songPlays["artistName"].isin(artists))]
songPlays = songPlays[songPlays["msPlayed"] > 10000] #removes streams that are played for less than 10 seconds
songPlays = songPlays.reset_index(drop=True)
songPlays = songPlays.apply(add_song_features_to_song_plays, axis=1)

#Preview data set
songPlays


In [190]:
# Write to file
with open("songplays.csv", "wt") as f:
    songPlays.to_csv(f)

#### Making Playlists with API

In [148]:
# Specify desired characteristics of song
# This is created my "Low Key Bops" playlist

playlistSize = 45

customPlaylist = uniqueSongs.loc[(uniqueSongs["danceability"] > 0.70) & (uniqueSongs["tempo"] < 115) 
                & (uniqueSongs["energy"] < .6) & (uniqueSongs["valence"] < .6)].head(playlistSize)


In [160]:
# Create playlist with top 100 songs
top100Playlist = uniqueSongs.head(100)

In [161]:
#Get playlist URIs

uris1 = list()
uris2 = list()
for u in list(customPlaylist["URI"]):
    if type(u) == str:
        uris1.append(u)
for u in list(top100Playlist["URI"]):
    if type(u) == str:
        uris2.append(u)
        

In [166]:
# Create playlists via API

playlistID_1 = get_playlist_URI("Low Key Bops")
playlistID_2 = get_playlist_URI("Top 100 songs")

sp_playlist.user_playlist_add_tracks(user="dowcowrox", playlist_id=playlistID_1, tracks=uris1)
sp_playlist.user_playlist_add_tracks(user="dowcowrox", playlist_id=playlistID_2, tracks=uris2)
