In [1]:
%reload_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import json
import time
import pandas as pd
import numpy as np

with open("secrets.json", "r") as f:
    secrets = json.load(f)

<IPython.core.display.Javascript object>

In [3]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2


CLIENT_ID = secrets["spotify_client_id"]
CLIENT_SECRET = secrets["spotify_client_secret"]

credentials = oauth2.SpotifyClientCredentials(
    client_id=CLIENT_ID, client_secret=CLIENT_SECRET
)

token = credentials.get_access_token()
sp = spotipy.Spotify(auth=token)



  


<IPython.core.display.Javascript object>

In [4]:
def get_vecs(res):
    """
    Returns two arrays, pitch vectors then timbre vectors,
    consisting of the middle 50% of the song
    """
    pitch_vecs = []
    timbre_vecs = []

    song_dur = res["track"]["duration"]
    middle_50 = (song_dur * 0.25, song_dur * 0.75)
    begin, end = middle_50
    for segment in res["segments"]:
        if begin < segment["start"] < end:
            pitch_vecs.append(segment["pitches"])
            timbre_vecs.append(segment["timbre"])

    return pitch_vecs, timbre_vecs

<IPython.core.display.Javascript object>

In [5]:
def mean_vector_cov_1d(vectors, vec_type):

    cov_dict = {}
    if vec_type == "pitch":
        var_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
    elif vec_type == "timbre":
        var_names = ["t" + str(x) for x in range(12)]

    vecs_df = pd.DataFrame(np.array(vectors), columns=var_names)
    mean_dict = dict(vecs_df.mean())

    cov_matrix = pd.DataFrame(
        np.cov(np.array(vectors).T), columns=var_names, index=var_names
    )
    tall_cov = pd.melt(cov_matrix, ignore_index=False).reset_index()
    tall_cov.columns = ["c0", "c1", "covariance"]
    # removing redundant info
    tall_cov = tall_cov[tall_cov["c0"] <= tall_cov["c1"]]
    cov_dict = {}
    for i in range(tall_cov.shape[0]):
        curr_row = tall_cov.iloc[i, :]
        c0 = curr_row["c0"]
        c1 = curr_row["c1"]
        cov = curr_row["covariance"]
        cov_dict[f"{c0}-{c1}"] = cov

    return {**mean_dict, **cov_dict}

<IPython.core.display.Javascript object>

In [44]:
df = pd.read_csv("data/gen_playlist_tracks_full.csv", index_col=0)
df.head()

Unnamed: 0,id,artist,title,album,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,4Oun2ylbjFKMPTiaSbbCih,Cardi B,WAP (feat. Megan Thee Stallion),WAP (feat. Megan Thee Stallion),92,0.935,0.454,1,-7.509,1,0.375,0.0194,0.0,0.0824,0.357,133.073,187541,4
1,2SAqBLGA283SUiwJ3xOUVI,Drake,Laugh Now Cry Later (feat. Lil Durk),Laugh Now Cry Later (feat. Lil Durk),83,0.761,0.518,0,-8.871,1,0.134,0.244,3.5e-05,0.107,0.522,133.976,261493,4
2,3H7ihDc1dqLriiWXwsc2po,Topic,Breaking Me,Breaking Me,96,0.789,0.72,8,-5.652,0,0.218,0.223,0.0,0.129,0.664,122.031,166794,4
3,6UelLqGlWMcVH1E5c4H7lY,Harry Styles,Watermelon Sugar,Fine Line,97,0.548,0.816,0,-4.209,1,0.0465,0.122,0.0,0.335,0.557,95.39,174000,4
4,7ytR5pFWmSjzHJIeQkgog4,DaBaby,ROCKSTAR (feat. Roddy Ricch),BLAME IT ON BABY,100,0.746,0.69,11,-7.956,1,0.164,0.247,0.0,0.101,0.497,89.977,181733,4


<IPython.core.display.Javascript object>

In [51]:
df.shape

(13071, 18)

<IPython.core.display.Javascript object>

In [53]:
df["id"].nunique()

13071

<IPython.core.display.Javascript object>

In [6]:
res = sp.audio_analysis("0VjIjW4GlUZAMYd2vXMi3b")  # blinding lights, wknd

<IPython.core.display.Javascript object>

In [14]:
res["segments"]

[{'start': 0.0,
  'duration': 0.08857,
  'confidence': 0.493,
  'loudness_start': -51.234,
  'loudness_max_time': 0.03755,
  'loudness_max': -47.34,
  'loudness_end': 0.0,
  'pitches': [0.78,
   0.325,
   0.3,
   0.255,
   0.173,
   0.516,
   0.404,
   0.385,
   0.948,
   0.95,
   1.0,
   0.683],
  'timbre': [11.9,
   -43.549,
   -90.342,
   -75.396,
   38.456,
   -34.028,
   -39.009,
   -2.476,
   -24.523,
   1.69,
   3.832,
   -16.869]},
 {'start': 0.08857,
  'duration': 0.08889,
  'confidence': 0.416,
  'loudness_start': -48.034,
  'loudness_max_time': 0.06236,
  'loudness_max': -45.367,
  'loudness_end': 0.0,
  'pitches': [0.434,
   0.929,
   0.859,
   0.859,
   0.989,
   1.0,
   0.48,
   0.484,
   0.467,
   0.468,
   0.077,
   0.023],
  'timbre': [14.325,
   -70.178,
   -101.389,
   -99.259,
   8.625,
   -40.509,
   -12.995,
   -9.161,
   -22.542,
   6.045,
   -0.508,
   -22.13]},
 {'start': 0.17746,
  'duration': 0.16154,
  'confidence': 0.428,
  'loudness_start': -44.634,
  'lou

<IPython.core.display.Javascript object>

In [7]:
p, t = get_vecs(res)

<IPython.core.display.Javascript object>

In [10]:
len(t)

398

<IPython.core.display.Javascript object>

In [15]:
var_names = ["t" + str(x) for x in range(12)]

# vecs_df = pd.DataFrame(np.array(vectors), columns=var_names)
# mean_dict = dict(vecs_df.mean())

cov_matrix = pd.DataFrame(np.cov(np.array(t).T), columns=var_names, index=var_names)

<IPython.core.display.Javascript object>

In [20]:
cov_matrix.round(2)

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11
t0,10.14,36.83,50.65,-8.09,17.62,5.72,22.24,-10.42,-3.97,2.05,-6.45,-6.08
t1,36.83,1370.81,-106.4,120.9,-34.64,141.26,-274.61,-111.97,-150.81,-231.02,-91.9,-128.78
t2,50.65,-106.4,1349.81,-304.23,440.8,276.44,413.22,193.22,48.77,133.59,39.66,-68.86
t3,-8.09,120.9,-304.23,1626.59,21.83,-261.82,-92.17,-189.71,17.59,-85.38,-120.68,49.1
t4,17.62,-34.64,440.8,21.83,577.18,101.91,296.31,36.0,104.4,-6.43,-17.55,40.36
t5,5.72,141.26,276.44,-261.82,101.91,753.81,83.9,46.25,44.91,-49.37,-82.69,-38.24
t6,22.24,-274.61,413.22,-92.17,296.31,83.9,521.36,50.49,50.34,35.21,20.03,88.28
t7,-10.42,-111.97,193.22,-189.71,36.0,46.25,50.49,444.23,29.09,-30.43,47.78,10.86
t8,-3.97,-150.81,48.77,17.59,104.4,44.91,50.34,29.09,252.71,-35.98,-29.72,17.71
t9,2.05,-231.02,133.59,-85.38,-6.43,-49.37,35.21,-30.43,-35.98,290.43,43.55,-1.98


<IPython.core.display.Javascript object>

In [12]:
timbre_cov_1d = mean_vector_cov_1d(t, "timbre")

<IPython.core.display.Javascript object>

In [19]:
timbre_cov_1d

{'t0': 52.03070351758797,
 't1': 59.63849497487441,
 't2': -7.607188442211055,
 't3': -13.38495728643216,
 't4': 15.617072864321614,
 't5': -26.836228643216092,
 't6': 9.30303266331659,
 't7': -0.40024874371859254,
 't8': 9.660532663316586,
 't9': 4.237399497487439,
 't10': -13.48411055276381,
 't11': -0.8962613065326637,
 't0-t0': 10.135725317418334,
 't0-t1': 36.83087206147867,
 't1-t1': 1370.8144387392692,
 't0-t2': 50.647110505702294,
 't1-t2': -106.40429665712065,
 't2-t2': 1349.8121804001685,
 't10-t2': 39.65872606223813,
 't11-t2': -68.86433126850878,
 't0-t3': -8.085262067908792,
 't1-t3': 120.89528624832604,
 't2-t3': -304.22634156120023,
 't3-t3': 1626.587650388599,
 't10-t3': -120.68253326478741,
 't11-t3': 49.09608261068569,
 't0-t4': 17.624109263471006,
 't1-t4': -34.644675356056084,
 't2-t4': 440.79925614978544,
 't3-t4': 21.83172055607381,
 't4-t4': 577.1810505110504,
 't10-t4': -17.547371057415546,
 't11-t4': 40.362343656367436,
 't0-t5': 5.7178750428717855,
 't1-t5': 1

<IPython.core.display.Javascript object>

In [71]:
credentials = oauth2.SpotifyClientCredentials(
    client_id=CLIENT_ID, client_secret=CLIENT_SECRET
)
token = credentials.get_access_token()
sp = spotipy.Spotify(auth=token)
song_pitches = []
song_timbres = []
count = 0
load_bar = 20
for song_id in df["id"]:
    try:
        res = sp.audio_analysis(song_id)
    except ReadTimeout:
        time.sleep(5.1)
        res = sp.audio_analysis(song_id)
    p, t = get_vecs(res)

    pitch_cov_1d = mean_vector_cov_1d(p, "pitch")
    timbre_cov_1d = mean_vector_cov_1d(t, "timbre")

    pitch_cov_1d["id"] = song_id
    timbre_cov_1d["id"] = song_id

    song_pitches.append(pitch_cov_1d)
    song_timbres.append(timbre_cov_1d)

    count += 1
    if count % load_bar == 0:
        print(f"{(count//load_bar)*load_bar} songs done")
    time.sleep(1)

  after removing the cwd from sys.path.


20 songs done
40 songs done
60 songs done
80 songs done
100 songs done
120 songs done
140 songs done
160 songs done
180 songs done
200 songs done
220 songs done
240 songs done
260 songs done


NameError: name 'ReadTimeout' is not defined

<IPython.core.display.Javascript object>

In [109]:
len(song_pitches)

13071

<IPython.core.display.Javascript object>

In [108]:
count = len(song_pitches)
load_bar = 20
for song_id in df["id"][len(song_pitches) :]:
    try:
        res = sp.audio_analysis(song_id)
    except:
        time.sleep(5.2)
        credentials = oauth2.SpotifyClientCredentials(
            client_id=CLIENT_ID, client_secret=CLIENT_SECRET
        )
        token = credentials.get_access_token()
        sp = spotipy.Spotify(auth=token)
        res = sp.audio_analysis(song_id)
    p, t = get_vecs(res)

    pitch_cov_1d = mean_vector_cov_1d(p, "pitch")
    timbre_cov_1d = mean_vector_cov_1d(t, "timbre")

    pitch_cov_1d["id"] = song_id
    timbre_cov_1d["id"] = song_id

    song_pitches.append(pitch_cov_1d)
    song_timbres.append(timbre_cov_1d)

    count += 1
    if count % load_bar == 0:
        print(f"{(count//load_bar)*load_bar} songs done")

7980 songs done
8000 songs done
8020 songs done
8040 songs done
8060 songs done
8080 songs done


  # This is added back by InteractiveShellApp.init_path()


8100 songs done
8120 songs done
8140 songs done
8160 songs done
8180 songs done
8200 songs done
8220 songs done
8240 songs done
8260 songs done
8280 songs done
8300 songs done
8320 songs done
8340 songs done
8360 songs done
8380 songs done
8400 songs done
8420 songs done
8440 songs done
8460 songs done
8480 songs done
8500 songs done
8520 songs done
8540 songs done
8560 songs done
8580 songs done
8600 songs done
8620 songs done
8640 songs done
8660 songs done
8680 songs done
8700 songs done
8720 songs done
8740 songs done
8760 songs done
8780 songs done
8800 songs done
8820 songs done
8840 songs done
8860 songs done
8880 songs done
8900 songs done
8920 songs done
8940 songs done
8960 songs done
8980 songs done
9000 songs done
9020 songs done
9040 songs done
9060 songs done
9080 songs done
9100 songs done
9120 songs done
9140 songs done
9160 songs done
9180 songs done
9200 songs done
9220 songs done
9240 songs done
9260 songs done
9280 songs done
9300 songs done
9320 songs done
9340 son

<IPython.core.display.Javascript object>

In [110]:
pitch_cov_df = pd.DataFrame(song_pitches)
timbre_cov_df = pd.DataFrame(song_timbres)

<IPython.core.display.Javascript object>

In [111]:
pitch_cov_df.to_csv("pitch_covariance.csv")
timbre_cov_df.to_csv("timbre_covariance.csv")

<IPython.core.display.Javascript object>

In [106]:
len(song_pitches)
7976 #as of going to bed

7976

<IPython.core.display.Javascript object>

In [7]:
p, t = get_vecs(res)

<IPython.core.display.Javascript object>

In [41]:
mean_vector_cov_1d(t, "timbre")

{'t0': 52.03070351758797,
 't1': 59.63849497487441,
 't2': -7.607188442211055,
 't3': -13.38495728643216,
 't4': 15.617072864321614,
 't5': -26.836228643216092,
 't6': 9.30303266331659,
 't7': -0.40024874371859254,
 't8': 9.660532663316586,
 't9': 4.237399497487439,
 't10': -13.48411055276381,
 't11': -0.8962613065326637,
 't0-t0': 10.135725317418334,
 't0-t1': 36.83087206147867,
 't1-t1': 1370.8144387392692,
 't0-t2': 50.647110505702294,
 't1-t2': -106.40429665712065,
 't2-t2': 1349.8121804001685,
 't10-t2': 39.65872606223813,
 't11-t2': -68.86433126850878,
 't0-t3': -8.085262067908792,
 't1-t3': 120.89528624832604,
 't2-t3': -304.22634156120023,
 't3-t3': 1626.587650388599,
 't10-t3': -120.68253326478741,
 't11-t3': 49.09608261068569,
 't0-t4': 17.624109263471006,
 't1-t4': -34.644675356056084,
 't2-t4': 440.79925614978544,
 't3-t4': 21.83172055607381,
 't4-t4': 577.1810505110504,
 't10-t4': -17.547371057415546,
 't11-t4': 40.362343656367436,
 't0-t5': 5.7178750428717855,
 't1-t5': 1

<IPython.core.display.Javascript object>

In [42]:
mean_vector_cov_1d(p, "pitch")

{'C': 0.6779824120603013,
 'C#': 0.6512889447236182,
 'D': 0.3382211055276382,
 'D#': 0.34702512562814053,
 'E': 0.301032663316583,
 'F': 0.4727160804020099,
 'F#': 0.29266331658291445,
 'G': 0.3262613065326634,
 'G#': 0.27354020100502524,
 'A': 0.24300000000000002,
 'A#': 0.28607286432160806,
 'B': 0.23455778894472365,
 'C-C': 0.1155442440223787,
 'A-C': 0.014035770780856424,
 'A#-C': 0.013557046624811712,
 'B-C': 0.029401657190233285,
 'C-C#': 0.010503435825221818,
 'C#-C#': 0.07468997423515564,
 'A-C#': 0.004847239294710326,
 'A#-C#': 0.00931655571940306,
 'B-C#': 0.010271082756351016,
 'C-D': -0.0062328953457463625,
 'C#-D': 0.02350007449084213,
 'D-D': 0.06187452025872436,
 'A-D': -0.003228604534005038,
 'A#-D': -0.011275293229371038,
 'B-D': 0.020099138323861114,
 'C-D#': -0.023497631798792447,
 'C#-D#': 0.02842679624824374,
 'D-D#': 0.051469546067870846,
 'D#-D#': 0.07245563160892625,
 'A-D#': -0.00811265743073048,
 'A#-D#': -0.012273334329076114,
 'B-D#': 0.011064985949900642,


<IPython.core.display.Javascript object>