In [2]:
import json
import pandas as pd
import numpy as np
import spotipy
import requests
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth
import os
from collections import defaultdict


In [3]:
%load_ext dotenv
%dotenv

In [5]:

username = os.environ.get("USERNAME")
client_id = os.environ.get("CLIENT_ID")
client_secret = os.environ.get("CLIENT_SECRET")
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'
data_path = f"./{username}_data"
prefix = f"{username}_"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id,
                                                client_secret,     
                                                scope=scope, 
                                                redirect_uri=redirect_uri,
                                                username=username))

In [4]:
identifiers_string = open(f"{data_path}/{prefix}Identifiers.json", "r").read()
identifiers_json = json.loads(identifiers_string)

inferences_string = open(f"{data_path}/{prefix}Inferences.json", "r").read()
inferences_json = json.loads(inferences_string)

marquee_string = open(f"{data_path}/{prefix}Marquee.json", "r").read()
marquee_json = json.loads(marquee_string)

playlist_string = open(f"{data_path}/{prefix}Playlist1.json", "r").read()
playlist_json = json.loads(playlist_string)

search_string = open(f"{data_path}/{prefix}SearchQueries.json", "r").read()
search_json = json.loads(search_string)

library_string = open(f"{data_path}/{prefix}YourLibrary.json", "r").read()
library_json = json.loads(library_string)

In [6]:
import ast
from typing import List
from os import listdir
def get_streamings(path: str) -> List[dict]:
    
    files = [f"{data_path}/{x}" for x in listdir(path)
             if x.split('.')[0][:-1] == prefix+'StreamingHistory']
    
    all_streamings = []
    
    for file in files: 
        with open(file, 'r', encoding='UTF-8') as f:
            new_streamings = ast.literal_eval(f.read())
            all_streamings += [streaming for streaming 
                               in new_streamings]
    return all_streamings

In [6]:
# from collections import defaultdict

# streamings = get_streamings(data_path)

# exceptions = 0
# tracks_to_ids = defaultdict()
# for i in range(len(streamings)):
#    track_name = streamings[i]['trackName']
#    if track_name in tracks_to_ids:
#       streamings[i]['id'] = tracks_to_ids[track_name]
#    else:
#       search = sp.search(q=f"track:{track_name}",type="track")
#       try:
#          id = search['tracks']['items'][0]['id']
#          streamings[i]['id'] = id
#       except: #          tracks_to_ids[track_name] = None

# with open(f'{data_path}/{prefix}history_with_ids', 'w') as f:
#    f.write(json.dumps(streamings, indent=2))
#
# nones = 0
# for x, y in tracks_to_ids.items():
#      if y == None:
#         nones+=1

# print(nones)

In [7]:
tracks_file = open(f'{data_path}/{prefix}history_with_ids', 'r')
tracks = json.load(tracks_file)

indices_to_remove = []
for i in range(len(tracks)):
   try:
      tracks[i]['id']
   except KeyError:
      indices_to_remove.append(i)

tracks_result = []
prev = 0
for x in indices_to_remove:
   tracks_result += tracks[prev + 1: x]
   prev = x
tracks = tracks_result

In [27]:
def get_features(track_ids: list) -> list:
    features = sp.audio_features(track_ids)
    

In [8]:
tracks_df = pd.DataFrame(tracks).dropna(subset=["id"], axis=0)
unique_ids = tracks_df['id'].unique()
block = 0
features = []
while(len(unique_ids) - block > 0):
   ids = unique_ids[block:block + 100]
   block+=100
   features += sp.audio_features(ids)

In [16]:
pd.merge(tracks_df, pd.DataFrame(features), on='id', how='left').to_csv(f'{data_path}/{prefix}history_with_ids_features.csv', index=False)

In [None]:
with open(f'{data_path}/{prefix}history_with_features', 'w') as f:
   f.write(json.dumps(tracks, indent=2))

In [15]:
sp.audio_features(["6tJFtthY0rI1x06qb8NjK0","6iMBQot4ZWbc1judI3x1eG"])

[{'danceability': 0.417,
  'energy': 0.408,
  'key': 5,
  'loudness': -13.343,
  'mode': 0,
  'speechiness': 0.0761,
  'acousticness': 0.535,
  'instrumentalness': 0.655,
  'liveness': 0.102,
  'valence': 0.233,
  'tempo': 165.799,
  'type': 'audio_features',
  'id': '6tJFtthY0rI1x06qb8NjK0',
  'uri': 'spotify:track:6tJFtthY0rI1x06qb8NjK0',
  'track_href': 'https://api.spotify.com/v1/tracks/6tJFtthY0rI1x06qb8NjK0',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6tJFtthY0rI1x06qb8NjK0',
  'duration_ms': 198862,
  'time_signature': 4},
 {'danceability': 0.886,
  'energy': 0.568,
  'key': 9,
  'loudness': -7.841,
  'mode': 1,
  'speechiness': 0.0439,
  'acousticness': 0.0375,
  'instrumentalness': 0.00216,
  'liveness': 0.0858,
  'valence': 0.753,
  'tempo': 109.981,
  'type': 'audio_features',
  'id': '6iMBQot4ZWbc1judI3x1eG',
  'uri': 'spotify:track:6iMBQot4ZWbc1judI3x1eG',
  'track_href': 'https://api.spotify.com/v1/tracks/6iMBQot4ZWbc1judI3x1eG',
  'analysis_url': 'https