In [594]:
import sys
import os
import configparser
import spotipy
import spotipy.util as util
import spotipy.oauth2 as oauth2
import requests
import json
import config
import time
import pandas as pd
import numpy as np
from json.decoder import JSONDecodeError
import matplotlib.pyplot as plt
from spotipy.oauth2 import SpotifyClientCredentials

In [343]:
client_credentials_manager = SpotifyClientCredentials(client_id=config.client_id, 
                                                          client_secret=
                                                          config.client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [185]:
# get song ids, given a playlist ID, username and the length of the playlist
def get_song_ids(playlistID, playlist_length):
    song_ids = []

    # need to paginate (only 100 ids given per request)
    for i in range(0, playlist_length, 100):
        song_ids.extend([x['track']['id'] for x in 
                       sp.user_playlist_tracks(config.username, 
                                               playlistID, offset=i, 
                                               fields='items(track(id))')['items']])
        time.sleep(.35)
    return song_ids

In [130]:
# get song features, given list of ids to get the features of
def get_features(ids):
    features = []
    i=0
    for track_id_list in ids:
    # can only get 50 at a time
        while i < len(ids):
            features.extend(sp.audio_features(tracks=track_id_list[i:i+50]))
            i += 50
            time.sleep(.35)

        # above will only run if len(ids) > 50
        if len(ids) <= 50:
            features.extend(sp.audio_features(tracks=track_id_list))

    return features

In [187]:
def get_track_objects(track_ids):
    tracks_info = []
    for track_id_list in track_ids:
        info = sp.tracks(track_id_list)
        tracks_info.append(info)
    return tracks_info

In [188]:
#Create a function to collect the tracks info for a given list of ids
def get_track_objects(track_ids):
    tracks_info = []
    for track_id_list in track_ids:
        info = sp.tracks(track_id_list)
        tracks_info.append(info)
    return tracks_info

In [186]:
#Use the artist and track to find a song id
def get_track_id(artist_name,track_name):
    results = sp.search("artist: " + artist_name + ", track: " + track_name)
    return results['tracks']['items'][0]['uri']

In [446]:
def get_artist_id(artist_name):
    results = sp.search("artist:"+artist_name)
    artist_id = results['tracks']['items'][0]['artists'][0]['uri']
    artist_id = artist_id.lstrip('spotify:artist:')
    return artist_id

In [467]:
def get_albums_id(artist_id, limit=50):
    albums = sp.artist_albums(artist_id,country="US", limit=limit)
    album_id = [album['uri'].strip('spotify:album:') for album in albums['items']]
    return album_id

In [174]:
mykle_lowry = '0JmShn6UeNuPx16P5CmRMQ'

In [451]:
lst = [fergie_baby, mykle_lowry, pop_smoke]

In [773]:
top_2019_artist = ['Drake','Pusha T', 'Kodak Black','XXXTentacion',
                    'Meek Mill','Tory Lanez','Offset','Lil Baby','Gucci Mane',
                    'Future','French Montana','21 Savage','A Boogie Wit da Hoodie',
                    'Jay Rock','Nippsy Hussle','Gunna','Travis Scott', 'Roddy Rich', 'Pop Smoke']

In [775]:
artist_2019 = []
for items in top_2019_artist:
    try:
        items_id = get_artist_id(items.lower())
        artist_2019.append(items_id)
    except:
        None

In [783]:
albums_ids = []
for artists in artist_2019:
    albums_ides = get_albums_id(artists, 50)
    albums_ids.extend(albums_ides)

all_tracks = []
for album_id in albums_ids:
    try: 
        tracks = sp.album_tracks(album_id, limit=50)
        all_tracks.append(tracks)
    except:
        None

track_ids = []
for tracks in all_tracks:
    album_tracks = []
    for track in tracks.get('items'):
        album_tracks.append(track.get('uri'))
    track_ids.append(album_tracks)
print(len(track_ids))

track_objects = []
for track_id_list in track_ids:
    tracks = sp.tracks(track_id_list)
    track_objects.append(tracks)
print(len(track_objects))

audio_feature_objects = []
for track_id_list in track_ids:
    features = sp.audio_features(track_id_list)
    audio_feature_objects.append(features)
print(len(audio_feature_objects))


retrying ...1secs
retrying ...3secs
669
retrying ...2secs
retrying ...3secs
669
retrying ...3secs
retrying ...2secs
retrying ...3secs
retrying ...1secs
retrying ...3secs
669


In [784]:
spotify_data = {
    "audio_features": audio_feature_objects,
    "tracks": track_objects
}

In [785]:
#Reformatting from a dictionary to a table
df = pd.DataFrame(columns=[
    'name',
    'duration_ms',
    'popularity',
    'num_markets',
    'album',
    'disc_number',
    'is_explicit',
    'track_number',
    'release_date',
    'artist',
    'danceability',
    'energy',
    'key',
    'loudness',
    'mode',
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness',
    'valence',
    'tempo',
    'time_signature',])

df = []

for album_info, album_features in zip(spotify_data.get('tracks'), spotify_data.get('audio_features')):
    for track_info, track_features in zip(album_info.get('tracks'),album_features):
        y = {
            'name': track_info['name'],
            'duration_ms': track_info['duration_ms'],
            'popularity': track_info['popularity'],
            'num_markets': len(track_info['available_markets']),
            'album': track_info['album']['name'],
            'disc_number': track_info['disc_number'],
            'is_explicit': track_info['explicit'],
            'track_number': track_info['track_number'],
            'release_date': track_info['album']['release_date'],
            'artist': track_info['artists'][0]['name'],
            'danceability': track_features['danceability'],
            'energy': track_features['energy'],
            'key': track_features['key'],
            'loudness': track_features['loudness'],
            'mode': track_features['mode'],
            'speechiness': track_features['speechiness'],
            'acousticness': track_features['acousticness'],
            'instrumentalness': track_features['instrumentalness'],
            'liveness': track_features['liveness'],
            'valence': track_features['valence'],
            'tempo': track_features['tempo'],
            'time_signature': track_features['time_signature'],
        }
        df.append(y)

In [786]:
top_2019_artist = pd.DataFrame(df)

In [788]:
top_2019_artist[top_2019_artist['artist'] == 'Pop Smoke']

Unnamed: 0,name,duration_ms,popularity,num_markets,album,disc_number,is_explicit,track_number,release_date,artist,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
5655,Meet the Woo,121080,50,79,Meet The Woo (Deluxe Version),1,True,1,2019-09-13,Pop Smoke,...,1,-8.241,1,0.29,0.022,7.1e-05,0.0881,0.383,142.987,4
5656,Welcome To The Party,214960,51,79,Meet The Woo (Deluxe Version),1,True,2,2019-09-13,Pop Smoke,...,2,-5.547,1,0.091,0.0134,7.9e-05,0.196,0.228,143.013,4
5657,Hawk Em,118800,48,79,Meet The Woo (Deluxe Version),1,True,3,2019-09-13,Pop Smoke,...,2,-5.523,1,0.221,0.262,1.4e-05,0.354,0.506,141.99,4
5658,Better Have Your Gun,199826,47,79,Meet The Woo (Deluxe Version),1,True,4,2019-09-13,Pop Smoke,...,1,-4.951,1,0.418,0.157,0.0,0.5,0.564,141.548,4
5659,Scenario,241786,48,79,Meet The Woo (Deluxe Version),1,True,5,2019-09-13,Pop Smoke,...,6,-4.342,0,0.325,0.0959,0.000977,0.283,0.555,143.093,4
5660,Dior,216386,52,79,Meet The Woo (Deluxe Version),1,True,6,2019-09-13,Pop Smoke,...,7,-5.732,1,0.351,0.212,0.000389,0.408,0.648,142.094,4
5661,Feeling,161280,46,79,Meet The Woo (Deluxe Version),1,True,7,2019-09-13,Pop Smoke,...,7,-6.723,1,0.338,0.41,0.0,0.153,0.489,71.44,4
5662,PTSD,199826,48,79,Meet The Woo (Deluxe Version),1,True,8,2019-09-13,Pop Smoke,...,10,-4.313,0,0.264,0.124,0.00278,0.158,0.199,91.615,4
5663,Brother Man,182880,46,79,Meet The Woo (Deluxe Version),1,True,9,2019-09-13,Pop Smoke,...,7,-7.165,1,0.145,0.435,9e-06,0.298,0.364,141.619,4
5664,Welcome To The Party (feat. Nicki Minaj) - Remix,180720,53,79,Meet The Woo (Deluxe Version),1,True,10,2019-09-13,Pop Smoke,...,2,-6.001,1,0.253,0.0317,0.0,0.158,0.41,142.931,4


In [577]:
main_stream = pd.DataFrame(df)

In [755]:
main_stream.to_csv('main_stream.csv')

In [578]:
roddy_rich = roddy_rich

In [584]:
main_stream['artist'].unique()

array(['Fergie Baby', 'Groove Gods Unite', 'YK Dee', 'Yung Quando',
       'Kongo The Rockstar', 'Lowry', 'UP'], dtype=object)

In [590]:
fergie = main_stream[main_stream['artist'] == 'Fergie Baby']

In [592]:
fergie.columns

Index(['name', 'duration_ms', 'popularity', 'num_markets', 'album',
       'disc_number', 'is_explicit', 'track_number', 'release_date', 'artist',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'time_signature'],
      dtype='object')

### Data Analysis

In [603]:
import plotly.graph_objects as go
from matplotlib import style

In [717]:
ferg_tempo = fergie['tempo'].mean()
rr_tempo = roddy_rich['tempo'].mean()
arts = ['Ferg', 'Roody']
fig = go.Figure([go.Bar(x=arts, y=[ferg_tempo,rr_tempo], 
                        text=[ferg_tempo,rr_tempo], textposition='auto')])
fig.update_layout(title_text=f'{arts[0]} vs {arts[1]} - Tempo')
fig.show()

In [719]:
ferg_liveness = fergie['liveness'].mean()
rr_liveness = roddy_rich['liveness'].mean()
arts = ['Ferg', 'Roody']
fig = go.Figure([go.Bar(x=arts, y=[ferg_liveness,rr_liveness],
                       text=[ferg_liveness,rr_liveness], textposition='auto')])
fig.update_layout(title_text=f'{arts[0]} vs {arts[1]} - Liveness')
fig.show()

In [720]:
roddy_rich.columns

Index(['name', 'duration_ms', 'popularity', 'num_markets', 'album',
       'disc_number', 'is_explicit', 'track_number', 'release_date', 'artist',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'time_signature'],
      dtype='object')

In [738]:
roddy_rich['speechiness'].mean()

0.21022677419354846

In [737]:
fergie['speechiness'].mean()

0.42808000000000007

In [753]:
categories = ['danceability','energy',
              'acousticness', 'valence',
              'liveness', 'speechiness',
             'instrumentalness']

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[fergie['danceability'].mean(),fergie['energy'].mean(), 
         fergie['acousticness'].mean(),fergie['valence'].mean(),
        fergie['liveness'].mean(), fergie['speechiness'].mean(),
         fergie['instrumentalness'].mean()],
      theta=categories,
      fill='toself',
      name='Fergie'
))
fig.add_trace(go.Scatterpolar(
      r=[roddy_rich['danceability'].mean(),roddy_rich['energy'].mean(), 
         roddy_rich['acousticness'].mean(), roddy_rich['valence'].mean(),
        roddy_rich['liveness'].mean(),roddy_rich['speechiness'].mean(),
        roddy_rich['instrumentalness'].mean()],
      theta=categories,
      fill='toself',
      name='Roddy'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
    range =[0.1,.9]
    )),
  showlegend=True
)
fig.show()