## Analysis of Spotify data

In [20]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import timeit
import pandas as pd
import matplotlib as plt
import plotly.plotly as py
import plotly.graph_objs as go
import seaborn as sns

In [4]:
cid ="47e61e82d3044694b1735d8d566a159a" 
secret = "63c23dcc717745f389f658884059fc9c"

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [9]:
start = timeit.default_timer()
artist_name, track_name, popularity, track_id = [], [], [], []

for i in range(0,1000,50):
    track_results = sp.search(q='year:2018', type='track', limit=50,offset=i)
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
      
stop = timeit.default_timer()
print ('Time to run this code (in seconds):', stop - start)

Time to run this code (in seconds): 38.08862403635851


In [10]:
print('artist: ', len(artist_name))
print('\n track', len(track_name))
print('\n popularity', len(popularity))
print('\n track_id', len(track_id))

artist:  1000

 track 1000

 popularity 1000

 track_id 1000


In [32]:
dat = pd.DataFrame({'artist':artist_name,'track':track_name,'track_id':track_id,'popularity':popularity})

In [33]:
dat.head()

Unnamed: 0,artist,track,track_id,popularity
0,Post Malone,Sunflower - Spider-Man: Into the Spider-Verse,3KkXRkHbMCARz0aVfEt68P,99
1,Los Unidades,E-Lo (feat. Jozzy),3eydp9rHJAskzOevEBK267,70
2,Post Malone,Wow.,6MWtB6iiXyIwun0YzU6DFP,98
3,Meek Mill,Going Bad (feat. Drake),2IRZnDFmlqMuOrYOLnZZyc,95
4,Ariana Grande,"thank u, next",2rPE9A1vEgShuZxxzR2tZH,100


In [34]:
dat.describe()

Unnamed: 0,popularity
count,1000.0
mean,77.359
std,6.415706
min,67.0
25%,72.0
50%,76.0
75%,81.0
max,100.0


### Number of duplicate

In [35]:
grouped = dat.groupby(['artist','track'], as_index=True).size()
grouped[grouped > 1].count()

143

In [36]:
dat.drop_duplicates(subset=['artist','track'], inplace=True)

In [37]:
dat.shape

(852, 4)

In [38]:
start = timeit.default_timer()
rows = []
batchs = 100
# counter for None results
None_counter = 0

for i in range(0,len(dat['track_id']), batchs):
    batch = dat['track_id'][i:i+batchs]
    feature_results = sp.audio_features(batch)
    for i, t in enumerate(feature_results):
        if t == None:
            None_counter = None_counter + 1
        else:
            rows.append(t)
            
print('Number of tracks where no audio features were available:',None_counter)

stop = timeit.default_timer()
print ('Time to run this code (in seconds):',stop - start)

Number of tracks where no audio features were available: 0
Time to run this code (in seconds): 2.601179733817389


In [39]:
aud = pd.DataFrame.from_dict(rows,orient='columns')

In [40]:
aud.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,0.556,https://api.spotify.com/v1/audio-analysis/3KkX...,0.76,158040,0.479,3KkXRkHbMCARz0aVfEt68P,0.0,2,0.0703,-5.574,1,0.0466,89.911,4,https://api.spotify.com/v1/tracks/3KkXRkHbMCAR...,audio_features,spotify:track:3KkXRkHbMCARz0aVfEt68P,0.913
1,0.469,https://api.spotify.com/v1/audio-analysis/3eyd...,0.698,214720,0.797,3eydp9rHJAskzOevEBK267,0.00118,3,0.152,-5.125,0,0.0615,101.969,4,https://api.spotify.com/v1/tracks/3eydp9rHJAsk...,audio_features,spotify:track:3eydp9rHJAskzOevEBK267,0.53
2,0.163,https://api.spotify.com/v1/audio-analysis/6MWt...,0.833,149520,0.539,6MWtB6iiXyIwun0YzU6DFP,2e-06,11,0.101,-7.399,0,0.178,99.947,4,https://api.spotify.com/v1/tracks/6MWtB6iiXyIw...,audio_features,spotify:track:6MWtB6iiXyIwun0YzU6DFP,0.385
3,0.259,https://api.spotify.com/v1/audio-analysis/2IRZ...,0.889,180522,0.496,2IRZnDFmlqMuOrYOLnZZyc,0.0,4,0.252,-6.365,0,0.0905,86.003,4,https://api.spotify.com/v1/tracks/2IRZnDFmlqMu...,audio_features,spotify:track:2IRZnDFmlqMuOrYOLnZZyc,0.544
4,0.28,https://api.spotify.com/v1/audio-analysis/2rPE...,0.724,207333,0.647,2rPE9A1vEgShuZxxzR2tZH,0.0,1,0.102,-5.642,1,0.0658,106.96,4,https://api.spotify.com/v1/tracks/2rPE9A1vEgSh...,audio_features,spotify:track:2rPE9A1vEgShuZxxzR2tZH,0.435


In [41]:
aud.describe()

Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
count,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0
mean,0.214583,0.7064,199821.591549,0.609954,0.019268,5.257042,0.169744,-6.611575,0.551643,0.151218,126.100088,3.976526,0.443541
std,0.230708,0.136874,43899.98065,0.161044,0.11747,3.682255,0.117765,2.759222,0.497618,0.125531,29.797661,0.264487,0.210092
min,3.9e-05,0.144,37640.0,0.0151,0.0,0.0,0.028,-32.452,0.0,0.0251,61.579,1.0,0.0371
25%,0.03925,0.622,174352.5,0.51175,0.0,1.0,0.102,-7.63,0.0,0.0515,100.012,4.0,0.284
50%,0.122,0.7215,198011.0,0.617,0.0,5.0,0.122,-6.118,1.0,0.09865,126.266,4.0,0.435
75%,0.32325,0.80225,222672.75,0.72075,1e-05,8.0,0.194,-4.93425,1.0,0.2295,148.075,4.0,0.591
max,0.984,0.968,417920.0,0.975,0.982,11.0,0.955,-1.352,1.0,0.74,203.911,5.0,0.963


In [42]:
fdt = pd.merge(dat, aud, on='track_id', how='inner')

KeyError: 'track_id'