In [1]:
import pandas as pd
import numpy as np
import requests

### Create streaming dataframe

In [2]:
# read your 1+ StreamingHistory files (depending on how extensive your streaming history is) into pandas dataframes
df_stream = pd.read_json('./MyData/StreamingHistory0.json')
#df_stream1 = pd.read_json('\MyData\streamingHistory1.json')

# merge streaming dataframes
#df_stream = pd.concat([df_stream0, df_stream1])

# create a 'UniqueID' for each song by combining the fields 'artistName' and 'trackName'
df_stream['UniqueID'] = df_stream['artistName'] + ":" + df_stream['trackName']

df_stream.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID
0,2021-03-15 14:04,Crocodiles,Groove Is in the Heart / California Girls,682,Crocodiles:Groove Is in the Heart / California...
1,2021-03-15 15:09,Global News Podcast,Myanmar: Protesters killed in clashes,5674,Global News Podcast:Myanmar: Protesters killed...
2,2021-03-15 15:09,Conor Oberst,A Little Uncanny,124778,Conor Oberst:A Little Uncanny
3,2021-03-15 15:50,Global News Podcast,Myanmar: Protesters killed in clashes,1622749,Global News Podcast:Myanmar: Protesters killed...
4,2021-03-15 16:21,Global News Podcast,Myanmar: Protesters killed in clashes,9813,Global News Podcast:Myanmar: Protesters killed...


In [3]:
import json
from pandas.io.json import json_normalize


### Create library Dataframe

In [4]:

with open('./MyData/YourLibrary.json') as json_data:
    data = json.load(json_data)

df_library = pd.DataFrame(data['tracks'])


In [5]:

# add UniqueID column (same as above)
df_library['UniqueID'] = df_library['artist'] + ":" + df_library['track']

# add column with track URI stripped of 'spotify:track:'
new = df_library["uri"].str.split(":", expand = True)
df_library['track_uri'] = new[2]

df_library.head()

Unnamed: 0,artist,album,track,uri,UniqueID,track_uri
0,Johannes Brahms,Brahms: Hungarian Dances Nos. 1-21,"21 Hungarian Dances, WoO 1 (version for orches...",spotify:track:1e7uZfAoweDFcfrYVuifyk,"Johannes Brahms:21 Hungarian Dances, WoO 1 (ve...",1e7uZfAoweDFcfrYVuifyk
1,Jason Isbell,Southeastern,Live Oak,spotify:track:51JJuDBT4n2bgVasYgkBjD,Jason Isbell:Live Oak,51JJuDBT4n2bgVasYgkBjD
2,Steve Poltz,Live at the Belly Up,Kickin It Old School (Live),spotify:track:0mqcMV0qClE0p5hY7HqlT6,Steve Poltz:Kickin It Old School (Live),0mqcMV0qClE0p5hY7HqlT6
3,Chris Smither,Call Me Lucky,Change Your Mind,spotify:track:4tuWzu4mEMA2p0PPekTCpH,Chris Smither:Change Your Mind,4tuWzu4mEMA2p0PPekTCpH
4,Melody Pool,Deep Dark Savage Heart,Southern Nightshade,spotify:track:3nsW6nibHc3BtTblWF5EE0,Melody Pool:Southern Nightshade,3nsW6nibHc3BtTblWF5EE0


In [6]:
# create final dict as a copy df_stream
df_tableau = df_stream.copy()

# add column checking if streamed song is in library
# not used in this project but could be helpful for cool visualizations
df_tableau['In Library'] = np.where(df_tableau['UniqueID'].isin(df_library['UniqueID'].tolist()),1,0)

# left join with df_library on UniqueID to bring in album and track_uri
df_tableau = pd.merge(df_tableau, df_library[['album','UniqueID','track_uri']],how='left',on=['UniqueID'])

df_tableau.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album,track_uri
0,2021-03-15 14:04,Crocodiles,Groove Is in the Heart / California Girls,682,Crocodiles:Groove Is in the Heart / California...,0,,
1,2021-03-15 15:09,Global News Podcast,Myanmar: Protesters killed in clashes,5674,Global News Podcast:Myanmar: Protesters killed...,0,,
2,2021-03-15 15:09,Conor Oberst,A Little Uncanny,124778,Conor Oberst:A Little Uncanny,0,,
3,2021-03-15 15:50,Global News Podcast,Myanmar: Protesters killed in clashes,1622749,Global News Podcast:Myanmar: Protesters killed...,0,,
4,2021-03-15 16:21,Global News Podcast,Myanmar: Protesters killed in clashes,9813,Global News Podcast:Myanmar: Protesters killed...,0,,


### Create Genre Dataframe

In [7]:
from env import CLIENT_ID, CLIENT_SECRET


In [8]:
# generate access token

# authentication URL
AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [9]:
# used for authenticating all API calls
headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}

In [10]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [11]:
# create blank dictionary to store track URI, artist URI, and genres
dict_genre = {}

# convert track_uri column to an iterable list
track_uris = df_library['track_uri'].to_list()

# loop through track URIs and pull artist URI using the API,
# then use artist URI to pull genres associated with that artist
# store all these in a dictionary
for t_uri in track_uris:
    
    dict_genre[t_uri] = {'artist_uri': "", "genres":[]}
    
    r = requests.get(BASE_URL + 'tracks/' + t_uri, headers=headers)
    r = r.json()
    a_uri = r['artists'][0]['uri'].split(':')[2]
    dict_genre[t_uri]['artist_uri'] = a_uri
    
    s = requests.get(BASE_URL + 'artists/' + a_uri, headers=headers)
    s = s.json()
    dict_genre[t_uri]['genres'] = s['genres']

KeyboardInterrupt: 

In [None]:
# convert dictionary into dataframe with track_uri as the first column
df_genre = pd.DataFrame.from_dict(dict_genre, orient='index')
df_genre.insert(0, 'track_uri', df_genre.index)
df_genre.reset_index(inplace=True, drop=True)

df_genre.head()

In [None]:
df_genre_expanded = df_genre.explode('genres')
df_genre_expanded.head()

In [None]:
# save df_tableau and df_genre_expanded as csv files that we can load into Tableau
df_tableau.to_csv('MySpotifyDataTable.csv')
df_genre_expanded.to_csv('GenresExpandedTable.csv')

print('done')

---

In [13]:
pd.set_option('max_colwidth', 400)

In [12]:
#Bring in wranGgled data
df_tableau = pd.read_csv('MySpotifyDataTable.csv')
df_genre_expanded = pd.read_csv('GenresExpandedTable.csv')


In [31]:
df_tableau.tail()

Unnamed: 0.1,Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album,track_uri,minPlayed
8052,8052,2022-03-14 03:47,Miko Marks,Long Journey Home,187255,Miko Marks:Long Journey Home,0,,,3.120917
8053,8053,2022-03-14 03:47,Chicago Farmer,$13 Beers,10710,Chicago Farmer:$13 Beers,0,,,0.1785
8054,8054,2022-03-14 12:51,Global News Podcast,Russia strikes Ukraine army base near Polish border,4544,Global News Podcast:Russia strikes Ukraine army base near Polish border,0,,,0.075733
8055,8055,2022-03-14 13:21,Global News Podcast,Russia strikes Ukraine army base near Polish border,1761543,Global News Podcast:Russia strikes Ukraine army base near Polish border,0,,,29.35905
8056,8056,2022-03-14 13:25,Chicago Farmer,$13 Beers,198245,Chicago Farmer:$13 Beers,0,,,3.304083


In [18]:
df_genre_expanded.head(100)

Unnamed: 0.1,Unnamed: 0,track_uri,artist_uri,genres
0,0,1e7uZfAoweDFcfrYVuifyk,5wTAi7QkpP6kp8a54lmTOq,classical
1,0,1e7uZfAoweDFcfrYVuifyk,5wTAi7QkpP6kp8a54lmTOq,german romanticism
2,0,1e7uZfAoweDFcfrYVuifyk,5wTAi7QkpP6kp8a54lmTOq,late romantic era
3,1,51JJuDBT4n2bgVasYgkBjD,3Q8wgwyVVv0z4UEh1HB0KY,alternative country
4,1,51JJuDBT4n2bgVasYgkBjD,3Q8wgwyVVv0z4UEh1HB0KY,folk
...,...,...,...,...
95,17,7qbv4GNE6rFpYe78BIcu4v,3ZWab2LEVkNKiBPIClTwof,roots rock
96,17,7qbv4GNE6rFpYe78BIcu4v,3ZWab2LEVkNKiBPIClTwof,singer-songwriter
97,17,7qbv4GNE6rFpYe78BIcu4v,3ZWab2LEVkNKiBPIClTwof,texas country
98,18,2IB4JNfzBCtL4OL4PuHV88,0nJUwPwC9Ti4vvuJ0q3MfT,alternative country


---

### Get rid of podcasts 

When looking at the viz, podcasts are disproportionately represented by their length. Find a feature to filter them out and re-export

In [21]:
df_tableau[df_tableau['artistName'].str.contains('Podcast')]

Unnamed: 0.1,Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album,track_uri
1,1,2021-03-15 15:09,Global News Podcast,Myanmar: Protesters killed in clashes,5674,Global News Podcast:Myanmar: Protesters killed in clashes,0,,
3,3,2021-03-15 15:50,Global News Podcast,Myanmar: Protesters killed in clashes,1622749,Global News Podcast:Myanmar: Protesters killed in clashes,0,,
4,4,2021-03-15 16:21,Global News Podcast,Myanmar: Protesters killed in clashes,9813,Global News Podcast:Myanmar: Protesters killed in clashes,0,,
5,5,2021-03-15 16:26,Global News Podcast,Myanmar: Protesters killed in clashes,283847,Global News Podcast:Myanmar: Protesters killed in clashes,0,,
12,12,2021-03-16 16:08,Global News Podcast,Mozambique militants 'beheading children',25671,Global News Podcast:Mozambique militants 'beheading children',0,,
...,...,...,...,...,...,...,...,...,...
7897,7897,2022-03-09 15:30,Global News Podcast,"Zelensky vows to fight Russia in 'forests, fields and shores'",1038458,"Global News Podcast:Zelensky vows to fight Russia in 'forests, fields and shores'",0,,
7898,7898,2022-03-09 18:42,Global News Podcast,"Zelensky vows to fight Russia in 'forests, fields and shores'",907520,"Global News Podcast:Zelensky vows to fight Russia in 'forests, fields and shores'",0,,
7997,7997,2022-03-11 19:26,Global News Podcast,Ukraine: Mariupol 'running out of food and water',1804027,Global News Podcast:Ukraine: Mariupol 'running out of food and water',0,,
8054,8054,2022-03-14 12:51,Global News Podcast,Russia strikes Ukraine army base near Polish border,4544,Global News Podcast:Russia strikes Ukraine army base near Polish border,0,,


In [22]:
df_tableau['minPlayed']=df_tableau.msPlayed/60000

Try filtering by song length. >10 min for podcasts - steve poltz live filtered out. Changed to 11 - lost emma smith sad eyed lady of the lowlands... Changed to 12 - still losing some rimsky-kosakov, stravinski, and other classical, but it is an improvement

- No keywords in the track or artist name for easy filtering - was hoping a genre merge would help

no track_uri or album for podcast, but that could be the answer

In [32]:
pd.set_option('display.max_rows', 100)

In [33]:
df_tableau[df_tableau['minPlayed']>12]

Unnamed: 0.1,Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album,track_uri,minPlayed
3,3,2021-03-15 15:50,Global News Podcast,Myanmar: Protesters killed in clashes,1622749,Global News Podcast:Myanmar: Protesters killed in clashes,0,,,27.045817
15,15,2021-03-16 16:39,Global News Podcast,Mozambique militants 'beheading children',1814043,Global News Podcast:Mozambique militants 'beheading children',0,,,30.234050
16,16,2021-03-16 17:08,Global News Podcast,Myanmar military continues its suppression of protesters,1653390,Global News Podcast:Myanmar military continues its suppression of protesters,0,,,27.556500
17,17,2021-03-16 17:25,Global News Podcast,Minneapolis to pay George Floyd family $27m,1048608,Global News Podcast:Minneapolis to pay George Floyd family $27m,0,,,17.476800
21,21,2021-03-18 14:45,Global News Podcast,Russia recalls envoy after Biden remarks about Putin,1705039,Global News Podcast:Russia recalls envoy after Biden remarks about Putin,0,,,28.417317
...,...,...,...,...,...,...,...,...,...,...
7725,7725,2022-03-01 18:34,Up First,"Tuesday, March 1, 2022",761927,"Up First:Tuesday, March 1, 2022",0,,,12.698783
7897,7897,2022-03-09 15:30,Global News Podcast,"Zelensky vows to fight Russia in 'forests, fields and shores'",1038458,"Global News Podcast:Zelensky vows to fight Russia in 'forests, fields and shores'",0,,,17.307633
7898,7898,2022-03-09 18:42,Global News Podcast,"Zelensky vows to fight Russia in 'forests, fields and shores'",907520,"Global News Podcast:Zelensky vows to fight Russia in 'forests, fields and shores'",0,,,15.125333
7997,7997,2022-03-11 19:26,Global News Podcast,Ukraine: Mariupol 'running out of food and water',1804027,Global News Podcast:Ukraine: Mariupol 'running out of food and water',0,,,30.067117


In [36]:
df_tableau[df_tableau['track_uri'].isnull()]


Unnamed: 0.1,Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album,track_uri,minPlayed
0,0,2021-03-15 14:04,Crocodiles,Groove Is in the Heart / California Girls,682,Crocodiles:Groove Is in the Heart / California Girls,0,,,0.011367
1,1,2021-03-15 15:09,Global News Podcast,Myanmar: Protesters killed in clashes,5674,Global News Podcast:Myanmar: Protesters killed in clashes,0,,,0.094567
2,2,2021-03-15 15:09,Conor Oberst,A Little Uncanny,124778,Conor Oberst:A Little Uncanny,0,,,2.079633
3,3,2021-03-15 15:50,Global News Podcast,Myanmar: Protesters killed in clashes,1622749,Global News Podcast:Myanmar: Protesters killed in clashes,0,,,27.045817
4,4,2021-03-15 16:21,Global News Podcast,Myanmar: Protesters killed in clashes,9813,Global News Podcast:Myanmar: Protesters killed in clashes,0,,,0.163550
...,...,...,...,...,...,...,...,...,...,...
8052,8052,2022-03-14 03:47,Miko Marks,Long Journey Home,187255,Miko Marks:Long Journey Home,0,,,3.120917
8053,8053,2022-03-14 03:47,Chicago Farmer,$13 Beers,10710,Chicago Farmer:$13 Beers,0,,,0.178500
8054,8054,2022-03-14 12:51,Global News Podcast,Russia strikes Ukraine army base near Polish border,4544,Global News Podcast:Russia strikes Ukraine army base near Polish border,0,,,0.075733
8055,8055,2022-03-14 13:21,Global News Podcast,Russia strikes Ukraine army base near Polish border,1761543,Global News Podcast:Russia strikes Ukraine army base near Polish border,0,,,29.359050


Null search gives 6411 rows including lots of podcasts, but apparently also songs