In [1]:
import json
import pandas as pd
import numpy as np
import spotipy
import requests
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth
import os
from collections import defaultdict


In [2]:
%load_ext dotenv
%dotenv

In [3]:

username = os.environ.get("USERNAME")
client_id = os.environ.get("CLIENT_ID")
client_secret = os.environ.get("CLIENT_SECRET")
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'
data_path = f"./{username}_data"
prefix = f"{username}_"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id,
                                                client_secret,     
                                                scope=scope, 
                                                redirect_uri=redirect_uri,
                                                username=username))

In [4]:
identifiers_string = open(f"{data_path}/{prefix}Identifiers.json", "r").read()
identifiers_json = json.loads(identifiers_string)

inferences_string = open(f"{data_path}/{prefix}Inferences.json", "r").read()
inferences_json = json.loads(inferences_string)

marquee_string = open(f"{data_path}/{prefix}Marquee.json", "r").read()
marquee_json = json.loads(marquee_string)

playlist_string = open(f"{data_path}/{prefix}Playlist1.json", "r").read()
playlist_json = json.loads(playlist_string)

search_string = open(f"{data_path}/{prefix}SearchQueries.json", "r").read()
search_json = json.loads(search_string)

library_string = open(f"{data_path}/{prefix}YourLibrary.json", "r").read()
library_json = json.loads(library_string)

In [5]:
import ast
from typing import List
from os import listdir
def get_streamings(path: str) -> List[dict]:
    
    files = [f"{data_path}/{x}" for x in listdir(path)
             if x.split('.')[0][:-1] == prefix+'StreamingHistory']
    
    all_streamings = []
    
    for file in files: 
        with open(file, 'r', encoding='UTF-8') as f:
            new_streamings = ast.literal_eval(f.read())
            all_streamings += [streaming for streaming 
                               in new_streamings]
    return all_streamings

In [6]:
# from collections import defaultdict

# streamings = get_streamings(data_path)

# exceptions = 0
# tracks_to_ids = defaultdict()
# for i in range(len(streamings)):
#    track_name = streamings[i]['trackName']
#    if track_name in tracks_to_ids:
#       streamings[i]['id'] = tracks_to_ids[track_name]
#    else:
#       search = sp.search(q=f"track:{track_name}",type="track")
#       try:
#          id = search['tracks']['items'][0]['id']
#          streamings[i]['id'] = id
#       except: #          tracks_to_ids[track_name] = None

# with open(f'{data_path}/{prefix}history_with_ids', 'w') as f:
#    f.write(json.dumps(streamings, indent=2))
#
# nones = 0
# for x, y in tracks_to_ids.items():
#      if y == None:
#         nones+=1

# print(nones)

In [17]:
tracks_file = open(f'{data_path}/{prefix}history_with_ids', 'r')
tracks = json.load(tracks_file)
# drop any rows without an id
indices_to_remove = []
for i in range(len(tracks)):
   try:
      tracks[i]['id']
   except KeyError:
      indices_to_remove.append(i)
      print(tracks[i])

tracks_result = []
prev = 0
for x in indices_to_remove:
   tracks_result += tracks[prev + 1: x]
   prev = x
tracks_clean = tracks_result

19091

{'endTime': '2020-11-06 04:23', 'artistName': 'The Ringer NFL Show', 'trackName': 'Can the Steelers Go 16-0? Plus: Brees vs. Brady, and What Antonio Brown Brings to the Bucs. | The Ringer NFL Show', 'msPlayed': 1765461}
{'endTime': '2020-11-07 00:40', 'artistName': 'The Bill Simmons Podcast', 'trackName': 'Burrow vs. Herbert, Ravens Concerns, Seattle’s Ceiling, and Trump’s Big Meltdown With Mina Kimes and JackO', 'msPlayed': 4965992}
{'endTime': '2020-11-08 06:10', 'artistName': 'The Ringer NBA Show', 'trackName': 'NBA Tentatively Approves 2020-21 Season Plan. Plus: Draft Rumors With Jonathan Tjarks and J. Kyle Mann | The Mismatch', 'msPlayed': 401449}
{'endTime': '2020-11-08 07:49', 'artistName': 'Frank Sinatra', 'trackName': "I've Got A Crush On You - 2020 Mix", 'msPlayed': 134693}
{'endTime': '2020-11-08 08:13', 'artistName': 'Frank Sinatra', 'trackName': "Mam'selle - 2020 Mix", 'msPlayed': 167733}
{'endTime': '2020-11-09 07:36', 'artistName': 'The Ringer NFL Show', 'trackName': 'We

In [8]:
def get_features(track_ids: list) -> list:
    features = sp.audio_features(track_ids)
    

In [15]:
tracks_df = pd.DataFrame(tracks).dropna(subset=["id"], axis=0)
unique_ids = tracks_df['id'].unique()
block = 0
features = []
while(len(unique_ids) - block > 0):
   ids = unique_ids[block:block + 100]
   block+=100
   features += sp.audio_features(ids)

In [18]:
# AFTER RUNNING THE TWO QUERIES THIS SHOULD BE THE FINAL FILE WITH SONG IDS AND FEATURES OF EACH
pd.merge(tracks_df, pd.DataFrame(features), on='id', how='left').to_csv(f'{data_path}/{prefix}history_with_features', index=False)
pd.read_csv(f'{data_path}/{prefix}history_with_features')


Unnamed: 0,endTime,artistName,trackName,msPlayed,id,danceability,energy,key,loudness,mode,...,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,2020-11-06 04:23,Wolf Gang,The King And All Of His Men,21056,6QU4xwSi2X8np8qo7JJb34,0.519,0.900,6,-4.844,1,...,0.000055,0.1030,0.594,120.001,audio_features,spotify:track:6QU4xwSi2X8np8qo7JJb34,https://api.spotify.com/v1/tracks/6QU4xwSi2X8n...,https://api.spotify.com/v1/audio-analysis/6QU4...,243773,4
1,2020-11-06 04:26,Christian French,superstars,199848,0TqaX16qLpX7YqFcAl9lDy,0.312,0.614,8,-6.073,1,...,0.000000,0.0898,0.356,205.379,audio_features,spotify:track:0TqaX16qLpX7YqFcAl9lDy,https://api.spotify.com/v1/tracks/0TqaX16qLpX7...,https://api.spotify.com/v1/audio-analysis/0Tqa...,199848,4
2,2020-11-06 04:29,Valley,sucks to see you doing better,125348,2LFprH1a4nNvmYha55xhj6,0.736,0.593,1,-8.405,1,...,0.000001,0.0562,0.612,121.996,audio_features,spotify:track:2LFprH1a4nNvmYha55xhj6,https://api.spotify.com/v1/tracks/2LFprH1a4nNv...,https://api.spotify.com/v1/audio-analysis/2LFp...,225137,4
3,2020-11-06 04:31,Quinn XCII,Stacy,168986,27L8sESb3KR79asDUBu8nW,0.774,0.750,11,-4.927,0,...,0.000018,0.0808,0.927,118.015,audio_features,spotify:track:27L8sESb3KR79asDUBu8nW,https://api.spotify.com/v1/tracks/27L8sESb3KR7...,https://api.spotify.com/v1/audio-analysis/27L8...,197987,4
4,2020-11-06 04:35,The Wldlfe,New Age Meds,210503,6C7KDGm5j5x1hrLf7H5X11,0.383,0.836,8,-6.086,1,...,0.000107,0.1180,0.469,107.030,audio_features,spotify:track:6C7KDGm5j5x1hrLf7H5X11,https://api.spotify.com/v1/tracks/6C7KDGm5j5x1...,https://api.spotify.com/v1/audio-analysis/6C7K...,210503,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16976,2021-11-06 19:49,Fall Out Boy,"Dance, Dance",180266,1zi7xx7UVEFkmKfv06H8x0,0.792,0.625,1,-5.609,1,...,0.001800,0.3290,0.370,103.967,audio_features,spotify:track:1zi7xx7UVEFkmKfv06H8x0,https://api.spotify.com/v1/tracks/1zi7xx7UVEFk...,https://api.spotify.com/v1/audio-analysis/1zi7...,173987,4
16977,2021-11-06 19:52,Panic! At The Disco,The Only Difference Between Martyrdom and Suic...,174866,4WdopfTdSd0nCLropjhsgH,0.501,0.933,6,-3.476,0,...,0.000000,0.3760,0.708,170.130,audio_features,spotify:track:4WdopfTdSd0nCLropjhsgH,https://api.spotify.com/v1/tracks/4WdopfTdSd0n...,https://api.spotify.com/v1/audio-analysis/4Wdo...,174867,4
16978,2021-11-06 19:52,All Time Low,Weightless,22017,0aZJGkkXR3DgaFqo5sB8ot,0.497,0.923,2,-4.512,1,...,0.000000,0.1580,0.663,181.074,audio_features,spotify:track:0aZJGkkXR3DgaFqo5sB8ot,https://api.spotify.com/v1/tracks/0aZJGkkXR3Dg...,https://api.spotify.com/v1/audio-analysis/0aZJ...,198000,4
16979,2021-11-06 20:53,Alien Ant Farm,Smooth Criminal,18250,6RfhBNU1FPWTG7VESlfgOl,0.882,0.662,0,-6.382,1,...,0.000000,0.1060,0.570,120.034,audio_features,spotify:track:6RfhBNU1FPWTG7VESlfgOl,https://api.spotify.com/v1/tracks/6RfhBNU1FPWT...,https://api.spotify.com/v1/audio-analysis/6Rfh...,106000,4


In [None]:
sp.audio_features(["6tJFtthY0rI1x06qb8NjK0","6iMBQot4ZWbc1judI3x1eG"])

[{'danceability': 0.417,
  'energy': 0.408,
  'key': 5,
  'loudness': -13.343,
  'mode': 0,
  'speechiness': 0.0761,
  'acousticness': 0.535,
  'instrumentalness': 0.655,
  'liveness': 0.102,
  'valence': 0.233,
  'tempo': 165.799,
  'type': 'audio_features',
  'id': '6tJFtthY0rI1x06qb8NjK0',
  'uri': 'spotify:track:6tJFtthY0rI1x06qb8NjK0',
  'track_href': 'https://api.spotify.com/v1/tracks/6tJFtthY0rI1x06qb8NjK0',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6tJFtthY0rI1x06qb8NjK0',
  'duration_ms': 198862,
  'time_signature': 4},
 {'danceability': 0.886,
  'energy': 0.568,
  'key': 9,
  'loudness': -7.841,
  'mode': 1,
  'speechiness': 0.0439,
  'acousticness': 0.0375,
  'instrumentalness': 0.00216,
  'liveness': 0.0858,
  'valence': 0.753,
  'tempo': 109.981,
  'type': 'audio_features',
  'id': '6iMBQot4ZWbc1judI3x1eG',
  'uri': 'spotify:track:6iMBQot4ZWbc1judI3x1eG',
  'track_href': 'https://api.spotify.com/v1/tracks/6iMBQot4ZWbc1judI3x1eG',
  'analysis_url': 'https