In [2]:
import json
import pandas as pd
import numpy as np
import spotipy
import requests
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth
import os
from collections import defaultdict


In [3]:
%load_ext dotenv
%dotenv

In [4]:

username = os.environ.get("USERNAME")
client_id = os.environ.get("CLIENT_ID")
client_secret = os.environ.get("CLIENT_SECRET")
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'
data_path = f"./{username}_data"
prefix = f"{username}_"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id,
                                                client_secret,     
                                                scope=scope, 
                                                redirect_uri=redirect_uri,
                                                username=username))

In [56]:
identifiers_string = open(f"{data_path}/{prefix}Identifiers.json", "r").read()
identifiers_json = json.loads(identifiers_string)

inferences_string = open(f"{data_path}/{prefix}Inferences.json", "r").read()
inferences_json = json.loads(inferences_string)

marquee_string = open(f"{data_path}/{prefix}Marquee.json", "r").read()
marquee_json = json.loads(marquee_string)

playlist_string = open(f"{data_path}/{prefix}Playlist1.json", "r").read()
playlist_json = json.loads(playlist_string)

search_string = open(f"{data_path}/{prefix}SearchQueries.json", "r").read()
search_json = json.loads(search_string)

library_string = open(f"{data_path}/{prefix}YourLibrary.json", "r").read()
library_json = json.loads(library_string)

In [5]:
import ast
from typing import List
from os import listdir
def get_streamings(path: str) -> List[dict]:
    
    files = [f"{data_path}/{x}" for x in listdir(path)
             if x.split('.')[0][:-1] == prefix+'StreamingHistory']
    
    all_streamings = []
    
    for file in files: 
        with open(file, 'r', encoding='UTF-8') as f:
            new_streamings = ast.literal_eval(f.read())
            all_streamings += [streaming for streaming 
                               in new_streamings]
    return all_streamings

In [6]:
tracks_file = open(f'{data_path}/{prefix}history_with_ids', 'r')
tracks = json.load(tracks_file)
# drop any rows without an id
indices_to_remove = []
for i in range(len(tracks)):
   try:
      tracks[i]['id']
   except KeyError:
      indices_to_remove.append(i)

tracks_result = []
prev = 0
for x in indices_to_remove:
   tracks_result += tracks[prev + 1: x]
   prev = x
tracks_clean = tracks_result

In [7]:
tracks_df = pd.DataFrame(tracks).dropna(subset=["id"], axis=0)
unique_ids = tracks_df['id'].unique()
block = 0
features = []
while(len(unique_ids) - block > 0):
   ids = unique_ids[block:block + 100]
   block+=100
   features += sp.audio_features(ids)

In [8]:

features = temp = np.array(features)
features = list(temp[~pd.isna(temp)])

In [9]:
# AFTER RUNNING THE TWO QUERIES THIS SHOULD BE THE FINAL FILE WITH SONG IDS AND FEATURES OF EACH
pd.merge(tracks_df, pd.DataFrame(features), on='id', how='left').to_csv(f'{data_path}/{prefix}history_with_features', index=False)
dataset_df = pd.read_csv(f'{data_path}/{prefix}history_with_features')


In [63]:
dataset_df

Unnamed: 0,endTime,artistName,trackName,msPlayed,id,danceability,energy,key,loudness,mode,...,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,2021-08-17 22:43,Trippie Redd,LEADERS,160993,4D7NrSeqkTarBrJ80b2sBc,0.950,0.654,0.0,-5.895,0.0,...,0.000000,0.1380,0.886,132.936,audio_features,spotify:track:4D7NrSeqkTarBrJ80b2sBc,https://api.spotify.com/v1/tracks/4D7NrSeqkTar...,https://api.spotify.com/v1/audio-analysis/4D7N...,194844.0,4.0
1,2021-08-17 22:47,Trippie Redd,FROZEN OCEAN,196169,5ypQ8scn8g2l0341ErES0B,0.314,0.762,2.0,-3.136,1.0,...,0.000022,0.1160,0.310,134.582,audio_features,spotify:track:5ypQ8scn8g2l0341ErES0B,https://api.spotify.com/v1/tracks/5ypQ8scn8g2l...,https://api.spotify.com/v1/audio-analysis/5ypQ...,204233.0,4.0
2,2021-08-17 22:49,WILLOW,t r a n s p a r e n t s o u l feat. Travis Barker,168398,1QL7nSDZCwZMnbisV4KOXt,0.390,0.830,11.0,-5.201,0.0,...,0.000000,0.1560,0.191,90.021,audio_features,spotify:track:1QL7nSDZCwZMnbisV4KOXt,https://api.spotify.com/v1/tracks/1QL7nSDZCwZM...,https://api.spotify.com/v1/audio-analysis/1QL7...,168030.0,4.0
3,2021-08-17 22:52,Machine Gun Kelly,I Think I'm OKAY (with YUNGBLUD & Travis Barker),169397,2gTdDMpNxIRFSiu7HutMCg,0.628,0.744,7.0,-4.718,1.0,...,0.000000,0.3130,0.277,119.921,audio_features,spotify:track:2gTdDMpNxIRFSiu7HutMCg,https://api.spotify.com/v1/tracks/2gTdDMpNxIRF...,https://api.spotify.com/v1/audio-analysis/2gTd...,169397.0,4.0
4,2021-08-17 22:58,Trippie Redd,PILL BREAKER (feat. blackbear & Machine Gun Ke...,177058,7jjnn6K7p5MgOFuqLypAcM,0.527,0.790,2.0,-6.268,1.0,...,0.000000,0.3700,0.236,153.034,audio_features,spotify:track:7jjnn6K7p5MgOFuqLypAcM,https://api.spotify.com/v1/tracks/7jjnn6K7p5Mg...,https://api.spotify.com/v1/audio-analysis/7jjn...,177059.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12658,2021-08-17 22:29,Trippie Redd,RED SKY (feat. Machine Gun Kelly),152149,0yFfjXtSDkr81WVAPPS1Q4,0.472,0.769,7.0,-4.198,1.0,...,0.000000,0.2390,0.587,159.999,audio_features,spotify:track:0yFfjXtSDkr81WVAPPS1Q4,https://api.spotify.com/v1/tracks/0yFfjXtSDkr8...,https://api.spotify.com/v1/audio-analysis/0yFf...,152149.0,4.0
12659,2021-08-17 22:33,Trippie Redd,MEGLADON,194992,5Kq6LIFZsVD99rb7iT0QTG,0.453,0.739,7.0,-4.977,1.0,...,0.000000,0.6940,0.258,139.962,audio_features,spotify:track:5Kq6LIFZsVD99rb7iT0QTG,https://api.spotify.com/v1/tracks/5Kq6LIFZsVD9...,https://api.spotify.com/v1/audio-analysis/5Kq6...,194992.0,4.0
12660,2021-08-17 22:35,Trippie Redd,SAVE YOURSELF,160729,6FhB5ZyT4JsIJ0ozDlXCIp,0.503,0.921,8.0,-5.907,0.0,...,0.000003,0.2600,0.359,121.983,audio_features,spotify:track:6FhB5ZyT4JsIJ0ozDlXCIp,https://api.spotify.com/v1/tracks/6FhB5ZyT4JsI...,https://api.spotify.com/v1/audio-analysis/6FhB...,221160.0,4.0
12661,2021-08-17 22:38,Trippie Redd,DREAMER,170000,4yZEX3NsnXfobmZ0m8ZO4Y,0.723,0.751,9.0,-5.233,0.0,...,0.000000,0.0680,0.642,126.999,audio_features,spotify:track:4yZEX3NsnXfobmZ0m8ZO4Y,https://api.spotify.com/v1/tracks/4yZEX3NsnXfo...,https://api.spotify.com/v1/audio-analysis/4yZE...,190849.0,4.0
