# Preparing Spotify Data for Tableau

In [None]:
import pandas as pd
import numpy as np
import requests

# Creating Streaming/Library Dataframe

In [1]:
# Reading my 1+ StreamingHistory files
# (Depending on how extensive the streaming is)
# into pandas dataframes

df_streamAlpha = pd.read_json('StreamingHistory0.json')
df_streamBeta = pd.read_json('StreamingHistory1.json')

# Merging streaming dataframes
df_stream = pd.concat([df_streamAlpha,df_streamBeta])

# Creating a 'UniqueId' for each song by combining the fields 'artistName' and 'trackName'
df_stream['UniqueID'] = df_stream['artistName'] + ":" + df_stream['trackName']

df_stream.head()

In [None]:
# Reading my edited Library json file into a pandas dataframe
df_library = pd.read_json('YourLibrary1.json')

# Adding UniqueID column
df_library['UniqueID'] = df_library['artist'] + ":" + df_library['track']

# Adding column with track URI stripped of 'spotify:track:'
new = df_library["uri"].str.split(":", expand = True)
df_library['track_uri'] = new[2]

df_library.head()

In [None]:
# Creating a final dict as a copy df_stream
df_tableau = df_stream.copy()

# Adding column checking if streamed song in library
# not used in this project, but could be helpful for cool visualizations
df_tableau['In Library'] = np.where(df_tableau['UniqueID'].isin(df_library['UniqueID'].tolist()), 1, 0)

# Left join with df_library on UniqueId to bring in album and track_uri
df_tableau = pd.merge(df_tableau, df_library[['album','UniqueID', 'track_uri']], how = 'left', on = ['UniqueID'])

df_tableau.head()

# Creating Genre Dataframe

In [None]:
# Saving IDs from new project in Spotify Developer Dashboard

CLIENT_ID = '****e6bb'
CLIENT_SECRET = '****SECRET'

In [None]:
# Generating access token

# Authentication URL

# Post
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# Convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [None]:
# Authenticating all API calls
headers = {'Authorizaion': 'Bearer {token}'.format(token = access_token)}

In [None]:
# Base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [None]:
# A blank dictionary to store track URI, artist URI, and genres
dict_genre = {}

# Converting track_uri column to an iterable list
track_uris = df_library['track_uri'].to_list()

# Looping through track URIs and pull artist URI using the API,
# then use artists URI to pull genres associated with that artist
# store all these in a dictionary
for t_uri in track_uris:
    dict_genre[t_uri] = {'artist_uri': "", "genre":[]}

    r = requests.get(BASE_URL + 'tracks/' + t_uri, headers = headers)
    r = r.json()
    a_uri = r['artists'][0]['uri'].split(':')[2]
    dict_genre[t_uri]['artist_uri'] = a_uri

    s = requests.get(BASE_URL + 'artists/' + a_uri, headers = headers)
    s = s.json()
    dict_genre[t_uri]['genres'] = s['genres']