![spotify_logo](images/spotify_logo2.1.png)

# Spotify API Scrape

## Project Goal
1. Pull most recently played tracks
2. Pull top listen to tracks

## Helpful Links:
- [Spotify Web API - Authorization Guide](https://developer.spotify.com/documentation/general/guides/authorization-guide/)
- [Spotify API References](https://developer.spotify.com/documentation/web-api/reference/)

In [31]:
import config
import os
import requests
import json
from json import JSONEncoder
import pandas as pd
from datetime import datetime,timezone, timedelta
from tqdm import tqdm
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import base64
from urllib.parse import urlencode

import pytz
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import xlsxwriter
from tzlocal import get_localzone
import dateutil

In [32]:
# Create the file name we will be exporting later
today = datetime.today().strftime('%Y%m%d')
file_name = f"Spotify_Export_{today}.xlsx"
file_name

'Spotify_Export_20200821.xlsx'

## Step 1: Get Access Token

In [33]:
client_id = config.client_id
client_secret = config.client_secret

In [34]:
username = config.username
client_id = config.client_id
client_secret = config.client_secret
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'

auth_token = util.prompt_for_user_token(username=username, 
                                   scope=scope, 
                                   client_id=client_id,   
                                   client_secret=client_secret,     
                                   redirect_uri=redirect_uri)

## Step 2: Pull Recently Played

In [35]:
base_url = 'https://api.spotify.com/v1/me/player/recently-played?'
#track_id = '6y0igZArWVi6Iz0rj35c1Y'

#2. Authentication
#3. Parameters -- would be stored with authentication
headers = {
    "Authorization": f"Bearer {auth_token}"
}

#4. Create an empty list
personal_data = [] #would be good explore how to capture data at different points in time
r = requests.get(base_url+"&limit=50", headers=headers)
personal_data.append(json.loads(r.text))

In [58]:
personal_data[0]['items'][3]['track']['artists'][0]['id']

'3NPpFNZtSTHheNBaWC82rB'

In [59]:
track_ids = []
album_ids = []
artist_ids = []
album_names = []
artist_names = []
track_names = []
popularity_ls = []
played_ats = []

for i in range(len(personal_data[0]['items'])):
    track_ids.append(personal_data[0]['items'][i]['track']['id']) # Track ID
    album_ids.append(personal_data[0]['items'][i]['track']['album']['id']) # Albumn ID
    artist_ids.append(personal_data[0]['items'][i]['track']['artists'][0]['id']) # Artist ID
    album_names.append(personal_data[0]['items'][i]['track']['album']['name']) # Album Name
    artist_names.append(personal_data[0]['items'][i]['track']['artists'][0]['name']) # Artist Name
    track_names.append(personal_data[0]['items'][i]['track']['name']) # Track Name
    popularity_ls.append(personal_data[0]['items'][i]['track']['popularity']) # Track Popularity
    played_ats.append(personal_data[0]['items'][i]['played_at'])

In [61]:
list_dic={'track_id':track_ids,
          'album_id':album_ids,
          'artist_id':artist_ids,
          'track_name':track_names,
          'artist_name':artist_names,
          'album_name':album_names,
          'track_popularity':popularity_ls,
          'time_palyed': played_ats
    }

In [62]:
df1=pd.DataFrame(list_dic)
df1

Unnamed: 0,track_id,album_id,artist_id,track_name,artist_name,album_name,track_popularity,time_palyed
0,2uBhGx3m5iJrtsql8Qk00j,621cXqrTSSJi1WqDMSLmbL,3YQKmKGau1PzlVlkL1iodx,Levitate,Twenty One Pilots,Trench,66,2020-08-21T23:58:03.790Z
1,30GOQvdgeclBPrSahiD8Mm,6jlDeEiUQJ6j6DD4BmJao6,0RpddSzUHfncUWNJXKOsjy,Everything Is Killing Me,Neon Trees,I Can Feel You Forgetting Me,45,2020-08-21T23:55:41.944Z
2,2wEYnSmSU44HfluSkHQUJ5,5vhp9OYD695wwVDpjUEpqx,1YYxodrdAUSYhO8ueGQtXs,Let It Cover Me Up,Lydia,Liquor,38,2020-08-21T23:52:34.589Z
3,54nwCwKlRSl29ndEBFVaGl,7inQoz3exeJ2mTCtFXlPqP,3NPpFNZtSTHheNBaWC82rB,Belong,X Ambassadors,Belong EP,60,2020-08-21T23:48:57.151Z
4,17Dh6pmfB7F8I9m4qRT1Hn,1jzBdtkH7s7m44i0NBYKX7,57AWTp15e5b5u0amQXAgSL,Miss Connection,The Keystones,Miss Connection,36,2020-08-21T23:45:22.679Z
5,2EcQY09CYgJ1qk6H1qZf8h,1c9Sx7XdXuMptGyfCB6hHs,74XFHRwlV6OrjEM0A2NCMF,26,Paramore,After Laughter,60,2020-08-21T23:42:26.748Z
6,578ovsDSe6X192I7UTdpXq,5hxH5SSwxf6oObJB1KzS7J,2eam0iDomRHGBypaDQLwWI,Rollercoaster - MTV Unplugged,Bleachers,MTV Unplugged,34,2020-08-21T23:38:49.531Z
7,0IBjwCuG1fEjk5OenvuL6B,0iFbPnngmZnbKKeOR9r18P,7iPH2BRBF9wKa6ljxvdext,More Than You Know - Recorded at Metropolis St...,Fenne Lily,Spotify Singles,49,2020-08-21T23:35:34.470Z
8,3Gw2X0ZJgxlggAIuG36rVF,6xE5ktkCULl1fLK66hHoqY,5tFRohaO5yEsuJxmMnlCO9,Hollow,Barns Courtney,Hollow,47,2020-08-21T23:31:58.998Z
9,2cViIXIe8Pbd1sOJExMJlK,3DuiGV3J09SUhvp8gqNx8h,7FBcuc1gsnv6Y1nwFtNRCb,Cemetery Drive,My Chemical Romance,Three Cheers for Sweet Revenge,60,2020-08-21T22:22:09.261Z


In [63]:
# Create empty column to append data to
df1['local_time'] = ''

for i in range(len(personal_data[0]['items'])):
    # Convert UTC to local time zone
    utc_time = dateutil.parser.parse(df1['time_palyed'].iloc[i]).astimezone(get_localzone())
    # Format date/time
    local_time= utc_time.strftime('%Y-%m-%d %H:%M:%S')
    local_time
    
    df1['local_time'].iloc[i] = local_time

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [64]:
df1.head()

Unnamed: 0,track_id,album_id,artist_id,track_name,artist_name,album_name,track_popularity,time_palyed,local_time
0,2uBhGx3m5iJrtsql8Qk00j,621cXqrTSSJi1WqDMSLmbL,3YQKmKGau1PzlVlkL1iodx,Levitate,Twenty One Pilots,Trench,66,2020-08-21T23:58:03.790Z,2020-08-21 18:58:03
1,30GOQvdgeclBPrSahiD8Mm,6jlDeEiUQJ6j6DD4BmJao6,0RpddSzUHfncUWNJXKOsjy,Everything Is Killing Me,Neon Trees,I Can Feel You Forgetting Me,45,2020-08-21T23:55:41.944Z,2020-08-21 18:55:41
2,2wEYnSmSU44HfluSkHQUJ5,5vhp9OYD695wwVDpjUEpqx,1YYxodrdAUSYhO8ueGQtXs,Let It Cover Me Up,Lydia,Liquor,38,2020-08-21T23:52:34.589Z,2020-08-21 18:52:34
3,54nwCwKlRSl29ndEBFVaGl,7inQoz3exeJ2mTCtFXlPqP,3NPpFNZtSTHheNBaWC82rB,Belong,X Ambassadors,Belong EP,60,2020-08-21T23:48:57.151Z,2020-08-21 18:48:57
4,17Dh6pmfB7F8I9m4qRT1Hn,1jzBdtkH7s7m44i0NBYKX7,57AWTp15e5b5u0amQXAgSL,Miss Connection,The Keystones,Miss Connection,36,2020-08-21T23:45:22.679Z,2020-08-21 18:45:22


In [14]:
writer = pd.ExcelWriter(file_name)
df1.to_excel(writer,'recently_played')

## Step 3: Pull User's Top Artists & Tracks

In [65]:
username = config.username
client_id = config.client_id
client_secret = config.client_secret
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-top-read'

auth_token = util.prompt_for_user_token(username=username, 
                                   scope=scope, 
                                   client_id=client_id,   
                                   client_secret=client_secret,     
                                   redirect_uri=redirect_uri)

In [66]:
base_url = 'https://api.spotify.com/v1/me/top/tracks?'
#track_id = '6y0igZArWVi6Iz0rj35c1Y'

#2. Authentication
#3. Parameters -- would be stored with authentication
headers = {
    "Authorization": f"Bearer {auth_token}"
}

#4. Create an empty list
top_track_data = [] #would be good explore how to capture data at different points in time
r = requests.get(base_url+"time_range=medium_term"+"&&limit=50", headers=headers)
top_track_data.append(json.loads(r.text))

In [74]:
track_idss = []
album_idss = []
artist_idss = []
album_namess = []
album_relase_datess = []
artist_namess = []
popularity_lss = []
track_namess = []

for i in range(len(top_track_data[0]['items'])):
    track_idss.append(top_track_data[0]['items'][i]['id']) # Track ID
    album_idss.append(top_track_data[0]['items'][i]['album']['id']) # Album ID
    artist_idss.append(top_track_data[0]['items'][i]['album']['artists'][0]['id'])
    album_namess.append(top_track_data[0]['items'][i]['album']['name']) # Album Name
    album_relase_datess.append(top_track_data[0]['items'][i]['album']['release_date'])
    artist_namess.append(top_track_data[0]['items'][i]['album']['artists'][0]['name']) # Artist Name
    popularity_lss.append(top_track_data[0]['items'][i]['popularity'])
    track_namess.append(top_track_data[0]['items'][i]['name']) # Track Name

In [75]:
list_dic2={'track_id':track_idss,
           'album_id':album_idss,
           'artist_id':artist_idss,
           'track_name':track_namess,
           'album_name':album_namess,
           'artist_name':artist_namess,
           'track_popularity':popularity_lss,
           'album_relase_date':album_relase_datess,
    }

In [76]:
df2=pd.DataFrame(list_dic2)
df2

Unnamed: 0,track_id,album_id,artist_id,track_name,album_name,artist_name,track_popularity,album_relase_date
0,4pvb0WLRcMtbPGmtejJJ6y,2fenSS68JI1h4Fo296JfGr,06HL4z0CvFAxyc27GXpf02,exile (feat. Bon Iver),folklore,Taylor Swift,92,2020-07-24
1,6xZ4Q2k2ompmDppyeESIY8,4h3HXlnt6lryGzGbWmcFuY,3YQKmKGau1PzlVlkL1iodx,Level of Concern,Level of Concern,Twenty One Pilots,82,2020-04-09
2,1ci0BoqpvH73L2TJzHhw9y,3FallGXQGmURWo6JW3k1gM,3hSFS64223jyO9Ck66rLOf,Modern Chemistry,Tell All Your Friend,Okey Dokey,44,2019-03-22
3,4w2tfK0JA8KrVegKnxukf4,4XOMJHVuzJVWmqUdp4SYKP,5tFRohaO5yEsuJxmMnlCO9,The Kids Are Alright,404,Barns Courtney,38,2019-09-06
4,0VjIjW4GlUZAMYd2vXMi3b,4yP0hdKOZPNshxUOjY0cZj,1Xyo4u8uXC1ZmMpatF05PJ,Blinding Lights,After Hours,The Weeknd,99,2020-03-20
5,6xQpOC55lufcqXvSzp7GTb,7c2VhL8HYMCwWIs54xEX2o,5tFRohaO5yEsuJxmMnlCO9,Hard To Be Alone,Hard To Be Alone,Barns Courtney,41,2020-07-10
6,1uddOsj7TyRA13hnS2yDyk,2bMqAaEuEUDHBkdTNAUl1f,21MZoYDTcbPsC5crOQVqho,Can't Take My Eyes Off You,Can't Take My Eyes Off You / 3BadSoSad - Edit,Private Island,47,2019-11-15
7,2nUV1fiD45RN6cQZ85GDc1,0lvgNBfe5wm9gKkecWN5Hq,18qigp8zUYz9THL4t4vZV4,Salt (Nlmg),The Light Left Over,Ben Hon,39,2019-04-05
8,3FRJFImdfX5NSY3QH3jI4u,4cZhiGqIqqqlHxWOC9r7Jp,7FKTg75ADVMZgY3P9ZMRtH,Glistening,Something You Needed,Flipturn,37,2020-02-07
9,3flgdcFBWI84DPi4s1jhhd,4cZhiGqIqqqlHxWOC9r7Jp,7FKTg75ADVMZgY3P9ZMRtH,Savannah,Something You Needed,Flipturn,30,2020-02-07


## Step 4: Pull Track Info
- API Doc: https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-analysis/

In [20]:
# Create list of track_ids
track_id_ls = df2['track_id'].tolist()

In [21]:
track_data = []
for id in tqdm(track_id_ls):
    base_url = f'https://api.spotify.com/v1/audio-features/{id}?'

    #2. Authentication
    #3. Parameters -- would be stored with authentication
    headers = {
        "Authorization": f"Bearer {auth_token}"
    }

    r = requests.get(base_url, headers=headers)
    track_data.append(json.loads(r.text))

100%|██████████| 50/50 [00:04<00:00, 12.04it/s]


In [22]:
track_df = pd.json_normalize(track_data)
track_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.298,0.38,6,-8.426,1,0.0287,0.778,5.6e-05,0.11,0.152,75.602,audio_features,4pvb0WLRcMtbPGmtejJJ6y,spotify:track:4pvb0WLRcMtbPGmtejJJ6y,https://api.spotify.com/v1/tracks/4pvb0WLRcMtb...,https://api.spotify.com/v1/audio-analysis/4pvb...,285634,4
1,0.754,0.583,4,-7.34,0,0.0432,0.32,0.00015,0.144,0.77,122.012,audio_features,6xZ4Q2k2ompmDppyeESIY8,spotify:track:6xZ4Q2k2ompmDppyeESIY8,https://api.spotify.com/v1/tracks/6xZ4Q2k2ompm...,https://api.spotify.com/v1/audio-analysis/6xZ4...,220051,4
2,0.548,0.608,5,-7.506,1,0.0265,0.00415,2.5e-05,0.0665,0.436,89.997,audio_features,1ci0BoqpvH73L2TJzHhw9y,spotify:track:1ci0BoqpvH73L2TJzHhw9y,https://api.spotify.com/v1/tracks/1ci0BoqpvH73...,https://api.spotify.com/v1/audio-analysis/1ci0...,230213,4
3,0.547,0.771,3,-4.722,1,0.0284,0.0266,1e-06,0.0819,0.674,122.036,audio_features,4w2tfK0JA8KrVegKnxukf4,spotify:track:4w2tfK0JA8KrVegKnxukf4,https://api.spotify.com/v1/tracks/4w2tfK0JA8Kr...,https://api.spotify.com/v1/audio-analysis/4w2t...,235773,4
4,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.5e-05,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4


## Step 5: Pull Artist Info
- [Artist Endpoint Documentation](https://developer.spotify.com/documentation/web-api/reference/artists/get-artist/)

In [79]:
# Creating a uniqe list artist ids
artist_id_ls = df2['artist_id'].tolist()
artist_id_ls = list(dict.fromkeys(artist_id_ls))

artist_id_ls

['06HL4z0CvFAxyc27GXpf02',
 '3YQKmKGau1PzlVlkL1iodx',
 '3hSFS64223jyO9Ck66rLOf',
 '5tFRohaO5yEsuJxmMnlCO9',
 '1Xyo4u8uXC1ZmMpatF05PJ',
 '21MZoYDTcbPsC5crOQVqho',
 '18qigp8zUYz9THL4t4vZV4',
 '7FKTg75ADVMZgY3P9ZMRtH',
 '7gRH7pJQJaAvrwvi8STYEl',
 '2D4FOOOtWycb3Aw9nY5n3c',
 '0sBxk1gyQM4VN1j1pS01GJ',
 '0epOFNiUfyON9EYx7Tpr6V',
 '6eJa3zG1QZLRB3xgRuyxbm',
 '5cVeSOiS002MF1uiUFOPV5',
 '4ug3P1K8BaCdJXROrqHqhu',
 '4YZ5ECfbM2xSTSQTJGBbO5',
 '7mnBLXK823vNxN3UWB7Gfz',
 '7gP3bB2nilZXLfPHJhMdvc',
 '2PKUc5LXsZNjqOpAZKcFgz',
 '4njdEjTnLfcGImKZu1iSrz',
 '3tiDO34wzfyJdv8elYqKxj']

In [80]:
artist_data = []

for a_id in tqdm(artist_id_ls):
    base_url = f'https://api.spotify.com/v1/artists/{a_id}?'
    # example artist_id = '06HL4z0CvFAxyc27GXpf02'

    #2. Authentication
    #3. Parameters -- would be stored with authentication
    headers = {
        "Authorization": f"Bearer {auth_token}"
    }

    #4. Create an empty list
    r = requests.get(base_url, headers=headers)
    artist_data.append(json.loads(r.text))

100%|██████████| 21/21 [00:01<00:00, 14.28it/s]


In [84]:
artist_data

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02'},
  'followers': {'href': None, 'total': 31606289},
  'genres': ['pop'],
  'href': 'https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02',
  'id': '06HL4z0CvFAxyc27GXpf02',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/a37efbc7fd3f5f5df81b48ce9c6de53820b239c1',
    'width': 640},
   {'height': 320,
    'url': 'https://i.scdn.co/image/dc6f0ac18ad5db302e3c8a28b655b6ca7244ef08',
    'width': 320},
   {'height': 160,
    'url': 'https://i.scdn.co/image/b1627ca86690d2c5b6cc2fb1039a31014e96a22b',
    'width': 160}],
  'name': 'Taylor Swift',
  'popularity': 98,
  'type': 'artist',
  'uri': 'spotify:artist:06HL4z0CvFAxyc27GXpf02'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/3YQKmKGau1PzlVlkL1iodx'},
  'followers': {'href': None, 'total': 18003924},
  'genres': ['modern rock', 'rock'],
  'href': 'https://api.spotify.com/v1/artists/3YQKmKGau1PzlVlkL1iodx',
  'id':

In [111]:
artist_data[1]['genres']

['modern rock', 'rock']

In [122]:
artist_id_ls = []
artist_followers_ls = []
artist_genres_ls = []
artist_name_ls = []
artist_popularity_ls = []

for i in tqdm(range(len(artist_data))):
    artist_id_ls.append(artist_data[i]['id'])
    artist_followers_ls.append(artist_data[i]['followers']['total'])
    artist_genres_ls.append(artist_data[i]['genres'])
    artist_name_ls.append(artist_data[i]['name'])
    artist_popularity_ls.append(artist_data[i]['popularity'])

100%|██████████| 21/21 [00:00<00:00, 21026.59it/s]


In [123]:
list_dic3={'artist_id':artist_id_ls,
           'artist_followers':artist_followers_ls,
           'artist_genres':artist_genres_ls,
           'artist_name':artist_name_ls,
           'artist_popularity':artist_popularity_ls
    }

In [127]:
artist_df = pd.DataFrame(list_dic3)
artist_df

Unnamed: 0,artist_id,artist_followers,artist_genres,artist_name,artist_popularity
0,06HL4z0CvFAxyc27GXpf02,31606289,[pop],Taylor Swift,98
1,3YQKmKGau1PzlVlkL1iodx,18003924,"[modern rock, rock]",Twenty One Pilots,85
2,3hSFS64223jyO9Ck66rLOf,20776,"[indie garage rock, nashville indie]",Okey Dokey,47
3,5tFRohaO5yEsuJxmMnlCO9,322728,"[modern alternative rock, modern rock, rock]",Barns Courtney,66
4,1Xyo4u8uXC1ZmMpatF05PJ,23353421,"[canadian contemporary r&b, canadian pop, pop]",The Weeknd,95
5,21MZoYDTcbPsC5crOQVqho,24496,[vapor soul],Private Island,46
6,18qigp8zUYz9THL4t4vZV4,366,[],Ben Hon,25
7,7FKTg75ADVMZgY3P9ZMRtH,18803,[jacksonville indie],Flipturn,49
8,7gRH7pJQJaAvrwvi8STYEl,13289,"[lexington ky indie, retro soul]",Matt Duncan,45
9,2D4FOOOtWycb3Aw9nY5n3c,562915,"[indie pop, modern alternative rock, modern ro...",Declan McKenna,69


## Clean Up Track Data

## Update 'mode' to tell if track is major or minor

In [23]:
mod_dict = {0 : 'Minor',
            1: 'Major'}

In [24]:
track_df['mode'].replace(mod_dict, inplace=True)

## Update "key" to tell the actually key

In [25]:
music_dic = {
    0: 'C',
    1: 'C#/Db',
    2: 'D',
    3: 'D#/Eb',
    4: 'E',
    5: 'F',
    6: 'F#/Gb',
    7: 'G',
    8: 'G#/Ab',
    9: 'A',
    10: 'A#/Bb',
    11: 'B' 
}

In [26]:
track_df['key'].replace(music_dic, inplace=True)

## Add column duration in minutes/seconds

In [27]:
track_df['track_duration'] = ''

for i in range(len(track_df['id'])):
    millis=track_df['duration_ms'].iloc[i]
    track_df['track_duration'].iloc[i] = pd.to_datetime(millis, unit='ms').strftime('%H:%M:%S')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


## Rename track id column

In [28]:
track_df=track_df.rename(columns = {'id':'track_id'})
track_df.head()                     

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,track_id,uri,track_href,analysis_url,duration_ms,time_signature,track_duration
0,0.298,0.38,F#/Gb,-8.426,Major,0.0287,0.778,5.6e-05,0.11,0.152,75.602,audio_features,4pvb0WLRcMtbPGmtejJJ6y,spotify:track:4pvb0WLRcMtbPGmtejJJ6y,https://api.spotify.com/v1/tracks/4pvb0WLRcMtb...,https://api.spotify.com/v1/audio-analysis/4pvb...,285634,4,00:04:45
1,0.754,0.583,E,-7.34,Minor,0.0432,0.32,0.00015,0.144,0.77,122.012,audio_features,6xZ4Q2k2ompmDppyeESIY8,spotify:track:6xZ4Q2k2ompmDppyeESIY8,https://api.spotify.com/v1/tracks/6xZ4Q2k2ompm...,https://api.spotify.com/v1/audio-analysis/6xZ4...,220051,4,00:03:40
2,0.548,0.608,F,-7.506,Major,0.0265,0.00415,2.5e-05,0.0665,0.436,89.997,audio_features,1ci0BoqpvH73L2TJzHhw9y,spotify:track:1ci0BoqpvH73L2TJzHhw9y,https://api.spotify.com/v1/tracks/1ci0BoqpvH73...,https://api.spotify.com/v1/audio-analysis/1ci0...,230213,4,00:03:50
3,0.547,0.771,D#/Eb,-4.722,Major,0.0284,0.0266,1e-06,0.0819,0.674,122.036,audio_features,4w2tfK0JA8KrVegKnxukf4,spotify:track:4w2tfK0JA8KrVegKnxukf4,https://api.spotify.com/v1/tracks/4w2tfK0JA8Kr...,https://api.spotify.com/v1/audio-analysis/4w2t...,235773,4,00:03:55
4,0.514,0.73,C#/Db,-5.934,Major,0.0598,0.00146,9.5e-05,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4,00:03:20


# Create Master Dataframe

In [29]:
master_df = pd.merge(df2, track_df,
                       how='left', on=['track_id'])

In [30]:
master_df.head(10)

Unnamed: 0,track_id,track_name,album_name,artist_name,album_relase_date,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature,track_duration
0,4pvb0WLRcMtbPGmtejJJ6y,exile (feat. Bon Iver),folklore,Taylor Swift,2020-07-24,0.298,0.38,F#/Gb,-8.426,Major,...,0.11,0.152,75.602,audio_features,spotify:track:4pvb0WLRcMtbPGmtejJJ6y,https://api.spotify.com/v1/tracks/4pvb0WLRcMtb...,https://api.spotify.com/v1/audio-analysis/4pvb...,285634,4,00:04:45
1,6xZ4Q2k2ompmDppyeESIY8,Level of Concern,Level of Concern,Twenty One Pilots,2020-04-09,0.754,0.583,E,-7.34,Minor,...,0.144,0.77,122.012,audio_features,spotify:track:6xZ4Q2k2ompmDppyeESIY8,https://api.spotify.com/v1/tracks/6xZ4Q2k2ompm...,https://api.spotify.com/v1/audio-analysis/6xZ4...,220051,4,00:03:40
2,1ci0BoqpvH73L2TJzHhw9y,Modern Chemistry,Tell All Your Friend,Okey Dokey,2019-03-22,0.548,0.608,F,-7.506,Major,...,0.0665,0.436,89.997,audio_features,spotify:track:1ci0BoqpvH73L2TJzHhw9y,https://api.spotify.com/v1/tracks/1ci0BoqpvH73...,https://api.spotify.com/v1/audio-analysis/1ci0...,230213,4,00:03:50
3,4w2tfK0JA8KrVegKnxukf4,The Kids Are Alright,404,Barns Courtney,2019-09-06,0.547,0.771,D#/Eb,-4.722,Major,...,0.0819,0.674,122.036,audio_features,spotify:track:4w2tfK0JA8KrVegKnxukf4,https://api.spotify.com/v1/tracks/4w2tfK0JA8Kr...,https://api.spotify.com/v1/audio-analysis/4w2t...,235773,4,00:03:55
4,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,After Hours,The Weeknd,2020-03-20,0.514,0.73,C#/Db,-5.934,Major,...,0.0897,0.334,171.005,audio_features,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4,00:03:20
5,6xQpOC55lufcqXvSzp7GTb,Hard To Be Alone,Hard To Be Alone,Barns Courtney,2020-07-10,0.414,0.476,D#/Eb,-6.52,Major,...,0.0863,0.24,94.087,audio_features,spotify:track:6xQpOC55lufcqXvSzp7GTb,https://api.spotify.com/v1/tracks/6xQpOC55lufc...,https://api.spotify.com/v1/audio-analysis/6xQp...,182053,4,00:03:02
6,1uddOsj7TyRA13hnS2yDyk,Can't Take My Eyes Off You,Can't Take My Eyes Off You / 3BadSoSad - Edit,Private Island,2019-11-15,0.614,0.609,F,-5.837,Minor,...,0.152,0.429,123.996,audio_features,spotify:track:1uddOsj7TyRA13hnS2yDyk,https://api.spotify.com/v1/tracks/1uddOsj7TyRA...,https://api.spotify.com/v1/audio-analysis/1udd...,209000,4,00:03:29
7,2nUV1fiD45RN6cQZ85GDc1,Salt (Nlmg),The Light Left Over,Ben Hon,2019-04-05,0.57,0.595,F#/Gb,-7.833,Major,...,0.142,0.336,136.588,audio_features,spotify:track:2nUV1fiD45RN6cQZ85GDc1,https://api.spotify.com/v1/tracks/2nUV1fiD45RN...,https://api.spotify.com/v1/audio-analysis/2nUV...,202840,4,00:03:22
8,3FRJFImdfX5NSY3QH3jI4u,Glistening,Something You Needed,Flipturn,2020-02-07,0.668,0.574,D#/Eb,-12.066,Major,...,0.115,0.248,127.011,audio_features,spotify:track:3FRJFImdfX5NSY3QH3jI4u,https://api.spotify.com/v1/tracks/3FRJFImdfX5N...,https://api.spotify.com/v1/audio-analysis/3FRJ...,247559,4,00:04:07
9,3flgdcFBWI84DPi4s1jhhd,Savannah,Something You Needed,Flipturn,2020-02-07,0.535,0.0755,E,-20.142,Major,...,0.238,0.409,104.443,audio_features,spotify:track:3flgdcFBWI84DPi4s1jhhd,https://api.spotify.com/v1/tracks/3flgdcFBWI84...,https://api.spotify.com/v1/audio-analysis/3flg...,69072,4,00:01:09


In [None]:
master_df.to_excel(writer,'top_tracks')

# Create Excel File

In [129]:
current_directory = os.path.abspath(os.getcwd())

In [130]:
export_file_path = os.path.join(current_directory, 'spotify_export_files', file_name)

In [133]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(export_file_path, engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
df1.to_excel(writer,'recently_played', index=False)
master_df.to_excel(writer,'top_tracks',index=False)

# Get the xlsxwriter workbook and worksheet objects.
workbook  = writer.book
worksheet1 = writer.sheets['recently_played']
worksheet2 = writer.sheets['top_tracks']


for i, col in enumerate(master_df.columns):
    # find length of column i
    column_len = master_df[col].astype(str).str.len().max()
    # Setting the length if the column header is larger
    # than the max column value length
    column_len = max(column_len, len(col)) + 2
    
    # set the column length
    worksheet1.set_column(i, i, column_len)
    
for i, col in enumerate(df1.columns):
    # find length of column i
    column_len = df1[col].astype(str).str.len().max()
    # Setting the length if the column header is larger
    # than the max column value length
    column_len = max(column_len, len(col)) + 2
    
    # set the column length
    worksheet2.set_column(i, i, column_len)

writer.save()

In [136]:
df1['track_href'].astype(str).str.len().max()

KeyError: 'track_href'