In [73]:
import pandas as pd
import numpy as np
import altair as alt
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import unidecode

In [74]:
# Read in the processed FACTOR data
data = pd.read_csv("data/processed/final_recipients_edited.csv")

# A list of artists in their original names and spellings
artists_orig = list(data['artist'].unique())

# Spotify's API doesn't handle certain characters well so fixing a few special cases
data_copy = data.copy()
data_copy['artist'] = data_copy['artist'].str.replace('//', '')
data_copy['artist'] = data_copy['artist'].str.replace('[¡!]', '')

# A list of artists with their edited name (special characters removed)
artists_edited = list(data_copy['artist'].unique())

# Find the nan entry and remove it
for i, a in enumerate(artists_orig):
    if not isinstance(a, str):
        index=i

del artists_orig[index]
del artists_edited[index]

In [75]:
# These values will change for every user
cid = 'a5b5555f2ead41caab9749e59f33a7c9'
secret = '4f10606a167f43748ae16b39c1d841c9'

In [76]:
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret, requests_timeout=100)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [77]:
%%time

# Find the number of followers and popularity of each artist through spotipy
followers = []
popularity = []

for a in artists_edited:
    results = sp.search(q='artist:' + a, type='artist')
    match_found = False
    
    if len(results['artists']['items']) > 0:
        for i in range(len(results['artists']['items'])):
            # Replace special characters with their regular versions 
            # and force to lower caps before comparing names
            if unidecode.unidecode(results['artists']['items'][i]['name']).lower() == unidecode.unidecode(a).lower():        
                followers.append(results['artists']['items'][i]['followers']['total'])
                popularity.append(results['artists']['items'][i]['popularity'])
                match_found = True
                break
        
        # if there wasn't an artist match in the search results, append zero values
        if match_found==False:
            followers.append(0)
            popularity.append(0)
            
    # if no search results are returned, append zero values
    else:
        followers.append(0)
        popularity.append(0)

Wall time: 3min 31s


In [78]:
data_pop_fol = pd.DataFrame({'artist': artists_orig, 'followers': followers, 'popularity': popularity})

# Combine follower/popularity data with the processed data
data_merged = data.merge(data_pop_fol, how='inner')

# Remove the "Collective Initiative" grants which contain organizations
data_merged = data_merged.query("grant_type != 'Collective Initiatives'")

data_merged = data_merged[~data_merged['offer'].isna()]

# Manually remove the outlier James Brown because it's not the famous James Brown
data_merged.drop(data_merged.index[data_merged['artist'] == 'James Brown'], inplace = True)

In [79]:
# Group by artist and sum up total offer
data_grouped = data_merged.groupby(['artist', 'followers', 'popularity'], 
                                   as_index=False)['offer'].sum()

In [80]:
# Export to csv
data_grouped.to_csv("data/processed/spotify_followers_popularity.csv", index=False)
data_merged.to_csv("data/processed/spotify_with_grant_type.csv", index=False)

In [81]:
alt.Chart(data_grouped).mark_circle(opacity=0.5, size=49).encode(
    x = alt.X('offer'),
    y = alt.Y('followers'),
    tooltip = ['artist', 'followers']
)

In [82]:
alt.Chart(data_grouped).mark_circle(opacity=0.5, size=49).encode(
    x = alt.X('offer'),
    y = alt.Y('popularity'),
    tooltip = ['artist', 'popularity']
)

In [83]:
# a = "James Brown"
# results = sp.search(q='artist:' + a, type='artist')
# results['artists']["items"]

In [84]:
selection = alt.selection_multi(fields=['grant_type'], bind='legend')
alt.Chart(data_merged).mark_circle(opacity=0.5, size=49).encode(
    x = alt.X('offer'),
    y = alt.Y('followers'),
    tooltip = ['artist', 'followers'],
    color = "grant_type",
    opacity=alt.condition(selection, alt.value(1), alt.value(0))
).add_selection(
    selection
)

In [87]:
data_merged[data_merged['artist'] == "Daniel Caesar"]

Unnamed: 0,grant_type,year,applicant,artist,applicant_province,artist_province,offer,followers,popularity
1273,Comprehensive Artist,2020-2021,Golden Child Inc,Daniel Caesar,Ontario,Ontario,20000.0,2744439,88
1274,Comprehensive Artist,2019-2020,Golden Child Inc,Daniel Caesar,Ontario,Ontario,59000.0,2744439,88
