# Creating the Spotify Data File
Creation of three Pandas DataFrames:
 - Tracks
 - Audio Features
 - Final merged table
 
**POTENTIAL ACTIONS**
 - It may be worth cleaning the final DataFrame a bit, removing unwanted columns
 - Do we want to try & add some more info for analysis? (e.g. Can album info be extracted?)

In [1]:
# Import relevant libraries & Spotify Credentials
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotify_creds import cid
from spotify_creds import secret
import pandas as pd

# Load up Spotify credentials
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [2]:
# Loop for creating the track information
artist_name = []
track_name = []
popularity = []
track_id = []
for i in range(0,1000,50):
    track_results = sp.search(q='year:2021', type='track', limit=50,offset=i)
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])

In [3]:
# Create the track DataFrame
track_df = pd.DataFrame({'artist_name' : artist_name, 'track_name' : track_name, 'track_id' : track_id, 'popularity' : popularity})
track_df

Unnamed: 0,artist_name,track_name,track_id,popularity
0,Adele,Easy On Me,0gplL1WMoJ6iYaPgMCL0gX,100
1,Mr.Kitty,Years - 2021 Remaster,4S4ZY1yKo3WUtzsg3O6hcf,20
2,Taylor Swift,All Too Well (10 Minute Version) (Taylor's Ver...,5enxwA8aAbwZbf5qCHORXi,95
3,Elvis Costello & The Attractions,Pump It Up - 2021 Remaster,3oyc1mIdCBGaU55wX7otqM,63
4,Lil Nas X,INDUSTRY BABY (feat. Jack Harlow),27NovPIUIRrOZoCHxABJwK,97
...,...,...,...,...
995,Florence + The Machine,Dog Days Are Over,4tgYdBU8IEmZwU9mAs6du1,7
996,Shawn Mendes,It'll Be Okay,2KnLkZ3z7PO3kgVGHGqDpD,79
997,Tears For Fears,Everybody Wants To Rule The World,4qyMZZIKE7Qka9hrnE0ikK,8
998,Chase Matthew,County Line,1V8LJmUEAleAHmBUaD0ZFI,71


In [4]:
# Create the list of track features
features = []

for x in track_id:
    features.append(sp.audio_features(x))

In [5]:
# Create the DataFrame for track features
feat_df = pd.DataFrame()

for i in range(1000):
    feat_df = feat_df.append(features[i][0],ignore_index=True)
    
# Rename track id column for future merging with the other DataFrame
feat_df.rename(columns = {'id':'track_id'}, inplace = True)
feat_df

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,track_id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.578000,https://api.spotify.com/v1/audio-analysis/0gpl...,0.604,224695.0,0.366,0gplL1WMoJ6iYaPgMCL0gX,0.000000,5.0,0.1330,-7.519,1.0,0.0282,141.981,4.0,https://api.spotify.com/v1/tracks/0gplL1WMoJ6i...,audio_features,spotify:track:0gplL1WMoJ6iYaPgMCL0gX,0.1300
1,0.000672,https://api.spotify.com/v1/audio-analysis/4S4Z...,0.608,214159.0,0.841,4S4ZY1yKo3WUtzsg3O6hcf,0.000017,9.0,0.0704,-8.354,1.0,0.0293,129.994,4.0,https://api.spotify.com/v1/tracks/4S4ZY1yKo3WU...,audio_features,spotify:track:4S4ZY1yKo3WUtzsg3O6hcf,0.1850
2,0.274000,https://api.spotify.com/v1/audio-analysis/5enx...,0.631,613027.0,0.518,5enxwA8aAbwZbf5qCHORXi,0.000000,0.0,0.0880,-8.771,1.0,0.0303,93.023,4.0,https://api.spotify.com/v1/tracks/5enxwA8aAbwZ...,audio_features,spotify:track:5enxwA8aAbwZbf5qCHORXi,0.2050
3,0.009210,https://api.spotify.com/v1/audio-analysis/3oyc...,0.645,196680.0,0.809,3oyc1mIdCBGaU55wX7otqM,0.001080,11.0,0.1060,-6.120,1.0,0.0385,138.978,4.0,https://api.spotify.com/v1/tracks/3oyc1mIdCBGa...,audio_features,spotify:track:3oyc1mIdCBGaU55wX7otqM,0.9660
4,0.020300,https://api.spotify.com/v1/audio-analysis/27No...,0.736,212000.0,0.704,27NovPIUIRrOZoCHxABJwK,0.000000,3.0,0.0501,-7.409,0.0,0.0615,149.995,4.0,https://api.spotify.com/v1/tracks/27NovPIUIRrO...,audio_features,spotify:track:27NovPIUIRrOZoCHxABJwK,0.8940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,0.043800,https://api.spotify.com/v1/audio-analysis/4tgY...,0.499,252853.0,0.795,4tgYdBU8IEmZwU9mAs6du1,0.002400,7.0,0.1200,-5.285,1.0,0.0787,149.966,4.0,https://api.spotify.com/v1/tracks/4tgYdBU8IEmZ...,audio_features,spotify:track:4tgYdBU8IEmZwU9mAs6du1,0.3570
995,0.616000,https://api.spotify.com/v1/audio-analysis/2KnL...,0.398,222800.0,0.286,2KnLkZ3z7PO3kgVGHGqDpD,0.000146,7.0,0.0912,-11.881,1.0,0.0330,78.262,4.0,https://api.spotify.com/v1/tracks/2KnLkZ3z7PO3...,audio_features,spotify:track:2KnLkZ3z7PO3kgVGHGqDpD,0.0666
996,0.304000,https://api.spotify.com/v1/audio-analysis/4qyM...,0.641,253400.0,0.931,4qyMZZIKE7Qka9hrnE0ikK,0.001760,7.0,0.1260,-5.779,1.0,0.0701,111.989,4.0,https://api.spotify.com/v1/tracks/4qyMZZIKE7Qk...,audio_features,spotify:track:4qyMZZIKE7Qka9hrnE0ikK,0.5080
997,0.206000,https://api.spotify.com/v1/audio-analysis/1V8L...,0.536,208209.0,0.772,1V8LJmUEAleAHmBUaD0ZFI,0.000000,3.0,0.1630,-6.027,0.0,0.0339,134.019,4.0,https://api.spotify.com/v1/tracks/1V8LJmUEAleA...,audio_features,spotify:track:1V8LJmUEAleAHmBUaD0ZFI,0.4230


In [6]:
# Merge the two DataFrames together & remove any duplicate tracks to get final data file
spotify_df = track_df.merge(feat_df, how = "outer", on = "track_id")
spotify_df = spotify_df.drop_duplicates()
spotify_df

Unnamed: 0,artist_name,track_name,track_id,popularity,acousticness,analysis_url,danceability,duration_ms,energy,instrumentalness,...,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,Adele,Easy On Me,0gplL1WMoJ6iYaPgMCL0gX,100,0.578000,https://api.spotify.com/v1/audio-analysis/0gpl...,0.604,224695.0,0.366,0.000000,...,0.1330,-7.519,1.0,0.0282,141.981,4.0,https://api.spotify.com/v1/tracks/0gplL1WMoJ6i...,audio_features,spotify:track:0gplL1WMoJ6iYaPgMCL0gX,0.1300
1,Mr.Kitty,Years - 2021 Remaster,4S4ZY1yKo3WUtzsg3O6hcf,20,0.000672,https://api.spotify.com/v1/audio-analysis/4S4Z...,0.608,214159.0,0.841,0.000017,...,0.0704,-8.354,1.0,0.0293,129.994,4.0,https://api.spotify.com/v1/tracks/4S4ZY1yKo3WU...,audio_features,spotify:track:4S4ZY1yKo3WUtzsg3O6hcf,0.1850
2,Taylor Swift,All Too Well (10 Minute Version) (Taylor's Ver...,5enxwA8aAbwZbf5qCHORXi,95,0.274000,https://api.spotify.com/v1/audio-analysis/5enx...,0.631,613027.0,0.518,0.000000,...,0.0880,-8.771,1.0,0.0303,93.023,4.0,https://api.spotify.com/v1/tracks/5enxwA8aAbwZ...,audio_features,spotify:track:5enxwA8aAbwZbf5qCHORXi,0.2050
3,Elvis Costello & The Attractions,Pump It Up - 2021 Remaster,3oyc1mIdCBGaU55wX7otqM,63,0.009210,https://api.spotify.com/v1/audio-analysis/3oyc...,0.645,196680.0,0.809,0.001080,...,0.1060,-6.120,1.0,0.0385,138.978,4.0,https://api.spotify.com/v1/tracks/3oyc1mIdCBGa...,audio_features,spotify:track:3oyc1mIdCBGaU55wX7otqM,0.9660
4,Lil Nas X,INDUSTRY BABY (feat. Jack Harlow),27NovPIUIRrOZoCHxABJwK,97,0.020300,https://api.spotify.com/v1/audio-analysis/27No...,0.736,212000.0,0.704,0.000000,...,0.0501,-7.409,0.0,0.0615,149.995,4.0,https://api.spotify.com/v1/tracks/27NovPIUIRrO...,audio_features,spotify:track:27NovPIUIRrOZoCHxABJwK,0.8940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,Florence + The Machine,Dog Days Are Over,4tgYdBU8IEmZwU9mAs6du1,7,0.043800,https://api.spotify.com/v1/audio-analysis/4tgY...,0.499,252853.0,0.795,0.002400,...,0.1200,-5.285,1.0,0.0787,149.966,4.0,https://api.spotify.com/v1/tracks/4tgYdBU8IEmZ...,audio_features,spotify:track:4tgYdBU8IEmZwU9mAs6du1,0.3570
1002,Shawn Mendes,It'll Be Okay,2KnLkZ3z7PO3kgVGHGqDpD,79,0.616000,https://api.spotify.com/v1/audio-analysis/2KnL...,0.398,222800.0,0.286,0.000146,...,0.0912,-11.881,1.0,0.0330,78.262,4.0,https://api.spotify.com/v1/tracks/2KnLkZ3z7PO3...,audio_features,spotify:track:2KnLkZ3z7PO3kgVGHGqDpD,0.0666
1003,Tears For Fears,Everybody Wants To Rule The World,4qyMZZIKE7Qka9hrnE0ikK,8,0.304000,https://api.spotify.com/v1/audio-analysis/4qyM...,0.641,253400.0,0.931,0.001760,...,0.1260,-5.779,1.0,0.0701,111.989,4.0,https://api.spotify.com/v1/tracks/4qyMZZIKE7Qk...,audio_features,spotify:track:4qyMZZIKE7Qka9hrnE0ikK,0.5080
1004,Chase Matthew,County Line,1V8LJmUEAleAHmBUaD0ZFI,71,0.206000,https://api.spotify.com/v1/audio-analysis/1V8L...,0.536,208209.0,0.772,0.000000,...,0.1630,-6.027,0.0,0.0339,134.019,4.0,https://api.spotify.com/v1/tracks/1V8LJmUEAleA...,audio_features,spotify:track:1V8LJmUEAleAHmBUaD0ZFI,0.4230


In [7]:
# Export files to .csv (optional)

spotify_df.to_csv("merged.csv")
feat_df.to_csv("features.csv")
track_df.to_csv("tracks.csv")