In [1]:
import numpy as np
import ast

from spotify import *

history = getSpotifyHistory()

# Getting minute and hour aggregates
history = history.assign(
    hrPlayed = (history['msPlayed'] / 3600000),
    mnPlayed = (history['msPlayed'] / 60000)
)

# Uploading raw history
history.to_csv('tableauData/rawSpotifyHistory.csv')

# Calling Spotify API to get details on tracks from history file
# tracks_df = None
# for index, row in history[['artistName', 'trackName']].drop_duplicates().iterrows():
#     raw_data = sp.search(
#             f'track:{row.trackName} artist:{row.artistName}',
#             type='track',
#             limit=1
#         )['tracks']
#     if not raw_data['items']:
#         continue
#     clean_data = cleanSpotifyData(raw_data)
#     try:
#         tracks_df = tracks_df.append(clean_data, ignore_index = True)
#     except AttributeError:
#         tracks_df = clean_data
        
# full_history = history.merge(
#     tracks_df,
#     'inner',
#     left_on = ['artistName', 'trackName'],
#     right_on = ['artists.name', 'name']
# )
# full_history.to_csv('data/fullSpotifyHistory.csv')

full_history = pd.read_csv('data/fullSpotifyHistory.csv')

#Cleaning column Names
full_history = full_history.drop(
    [
        'Unnamed: 0', 'artists.href', 'artists.name', 
        'artists.type', 'artists.uri', 'artists.external_urls.spotify',
        'artistName'
    ],
    axis=1
).rename(columns={
    'artists.id':'artist.id',
    "genres":'genre'
})

# Pulling unique track and artist ids
track_ids = full_history.id.drop_duplicates().tolist()
artist_ids = full_history['artist.id'].drop_duplicates().to_list()

In [2]:
# artist_data = pd.DataFrame()
# for id in artist_ids:
#     raw_data = sp.artist(id)
#     clean_data = pd.json_normalize(raw_data, 'genres', 'id').rename(columns={0:'genres'})
#     # Appending the data to the DataFrame
#     artist_data = artist_data.append(clean_data)
# artist_data.to_csv('data/artistData.csv')

# Reading in artist data
artist_data = pd.read_csv('data/artistData.csv')

# Expanding the nested genres list
artist_data['genre'] = artist_data.genres.apply(ast.literal_eval)
artist_data = artist_data.explode(
    'genre'
).drop(
    ['Unnamed: 0', 'href', 'popularity', 
    'type', 'uri', 'external_urls.spotify',
    'followers.href'],
    axis = 1
)

artist_data.to_csv('data/longArtistData.csv')
artist_data.genres.drop_duplicates().to_csv('data/allGenres.csv')

In [3]:
# Grouping the genres together and counting their popularity
genre_popularity = artist_data.groupby(
    'genre'
).count().drop(
    [
        'images', 'name', 'followers.total',
        'genres'
    ],
    axis = 1
).rename(columns={
    'id':'n'
}).sort_values('n', ascending=False).reset_index()

long_artist_data = artist_data[['name', 'genre']].drop_duplicates().merge(
    genre_popularity,
)

artistGenres = long_artist_data.iloc[long_artist_data.groupby(
    'name'
)['n'].agg(pd.Series.idxmax)]

# Merging API data with artist Genres
full_artist_data = artist_data.merge(
    artistGenres,
    'inner'
)

# Cleaning the artist data for necessary columns and valid records
full_artist_data = full_artist_data[full_artist_data['genre'].notna()].drop(
    [
        'images', 'followers.total', 'genres',
        'n'
    ],
    axis = 1
).rename(columns={
    'id':'artist.id',
    'name':'artist.name'
})

# Merging tracks history with artist data
detailed_history = full_history.merge(
    full_artist_data[['artist.id', 'genre', 'artist.name']],
    'inner'
).drop_duplicates()

# Writing data to a csv
genre_popularity.to_csv('data/genreData.csv')
artistGenres.to_csv('data/cleanArtistData.csv')
full_artist_data.to_csv('data/cleanArtistData.csv')
detailed_history.to_csv('tableauData/detailedSpotifyHistory.csv')

In [4]:
detailed_history

Unnamed: 0,endTime,trackName,msPlayed,hrPlayed,mnPlayed,duration_ms,explicit,id,name,artist.id,album.id,album.images,album.name,album.release_date,album.total_tracks,genre,artist.name
0,2020-08-19 15:17,Rainy Day,46374,0.012882,0.772900,171177.0,False,5QSQLBGpNeTKqc75hQRCcu,Rainy Day,3MPp5u3D5zzyYaAT9qFpTV,6yJHxXpLynEDcPHcSnBUcS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Window Seat,2020-08-12,12.0,lo-fi beats,tysu
1,2020-10-21 18:48,Rainy Day,171177,0.047549,2.852950,171177.0,False,5QSQLBGpNeTKqc75hQRCcu,Rainy Day,3MPp5u3D5zzyYaAT9qFpTV,6yJHxXpLynEDcPHcSnBUcS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Window Seat,2020-08-12,12.0,lo-fi beats,tysu
2,2020-12-18 16:02,Rainy Day,171177,0.047549,2.852950,171177.0,False,5QSQLBGpNeTKqc75hQRCcu,Rainy Day,3MPp5u3D5zzyYaAT9qFpTV,6yJHxXpLynEDcPHcSnBUcS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Window Seat,2020-08-12,12.0,lo-fi beats,tysu
3,2021-02-22 13:17,Rainy Day,171177,0.047549,2.852950,171177.0,False,5QSQLBGpNeTKqc75hQRCcu,Rainy Day,3MPp5u3D5zzyYaAT9qFpTV,6yJHxXpLynEDcPHcSnBUcS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Window Seat,2020-08-12,12.0,lo-fi beats,tysu
4,2021-04-16 13:36,Rainy Day,171177,0.047549,2.852950,171177.0,False,5QSQLBGpNeTKqc75hQRCcu,Rainy Day,3MPp5u3D5zzyYaAT9qFpTV,6yJHxXpLynEDcPHcSnBUcS,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Window Seat,2020-08-12,12.0,lo-fi beats,tysu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,2021-10-13 17:23,Little Sunflower,225293,0.062581,3.754883,225293.0,False,3A6e928yOFJ0MhlG7xrFDW,Little Sunflower,0dAZ2slrElfR0Y5flcoSPt,1XEPKavl3nlI2qVt8HuA5n,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Afro-Harping,1968-01-01,10.0,funk,Dorothy Ashby
23661,2021-10-13 17:27,Somewhere I Belong,231466,0.064296,3.857767,231466.0,False,5ViM2bwFjNqx6K7w1iFlOE,Somewhere I Belong,38CPjAAqYV8lTYi6clzbjG,1q2ErTOAW7U3cx1LqucD1a,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",1969,1969,11.0,spiritual jazz,Gábor Szabó
23662,2021-10-13 17:31,The Warnings Part II,267693,0.074359,4.461550,267693.0,False,0KlruldrjiQnw4p9zrz9kh,The Warnings Part II,4hCKF3RZSkFSMntkfCxO74,2AwseJZN1t6y25o4P4asqH,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Earth Rot,1970,8.0,jazz funk,David Axelrod
23663,2021-10-13 17:41,Space is the Place,587150,0.163097,9.785833,587150.0,False,1hpdVGEaZovrlcnY0Yqcc9,Space is the Place,0tIODqvzGUoEaK26rK4pvX,6YJt08xA1gemCdsFmHVT4u,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",The Other Side of the Sun,1979,5.0,art rock,Sun Ra


In [5]:
# Finding top artist I listen to
groupedArtist = detailed_history.groupby(
    ['artist.id', 'artist.name', 'genre']
).aggregate(
    np.sum
).sort_values(
    'hrPlayed', 
    ascending=False
).head(50).drop(
    ['duration_ms', 'msPlayed', 'album.total_tracks'],
    axis=1
)

timesPlayed = detailed_history.groupby(
    ['artist.id', 'artist.name', 'genre']
).count().rename(columns={
    'id':'timesPlayed'
})['timesPlayed']

groupedArtist = groupedArtist.merge(
    timesPlayed,
    'inner',
    left_on = ['artist.id', 'artist.name', 'genre'],
    right_index = True
).drop(
    ['explicit'],
    axis=1
)


groupedArtist.to_csv('tableauData/topSpotifyArtist.csv')

In [6]:
# Finding the top songs I listen to
groupedTracks = detailed_history.groupby(
    ['trackName', 'artist.name', 'genre']
).aggregate(
    np.sum
).sort_values(
    'hrPlayed', 
    ascending=False
).head(60).drop(
    [
        'album.total_tracks'
    ],
    axis=1
)

timesPlayed = detailed_history.groupby(
    ['trackName', 'artist.name', 'genre']
).count().rename(columns={
    'id':'timesPlayed'
})['timesPlayed']

groupedTracks = groupedTracks.merge(
    timesPlayed,
    'inner',
    left_on = ['trackName', 'artist.name', 'genre'],
    right_index = True
).drop(
    ['explicit'],
    axis=1
)


groupedTracks.to_csv('tableauData/topSpotifyTracks.csv')

In [7]:
# Top Spofiy artist now
sp = spotify_authentication()
# Calling API
tracks = sp.current_user_top_tracks(limit=50, time_range='short_term')
# Cleaning data
df = cleanSpotifyData(tracks)

df = df.merge(
    full_artist_data[['artist.id', 'genre', 'artist.name']],
    'inner'
)

df.to_csv('tableauData/topSpotifyTracksNow.csv')

Max Retries reached


SpotifyException: http status: 429, code:-1 - /v1/me/top/tracks?time_range=short_term&limit=50&offset=0:
 Max Retries, reason: too many 503 error responses