In [142]:
import requests
import requests_cache
from decouple import config
import json
import time

In [143]:
from IPython.core.display import clear_output

In [144]:
requests_cache.install_cache()

In [145]:
API_KEY = config('API_KEY')
SHARED_SECRET = config('SHARED_SECRET')
CALLBACK = config('CALLBACK')

In [146]:
USER_AGENT = 'SidSaxena'

In [147]:
def lastfm_get(payload):
    #define headers and URL
    headers = {'user-agent': USER_AGENT}
    url = 'http://ws.audioscrobbler.com/2.0/'

    #api key and format to the payload
    payload['api_key'] = API_KEY
    payload['format'] = 'json'

    response = requests.get(url=url, headers=headers, params=payload)
    return response

In [148]:
def jprint(obj):
    # create a formatted string from the json object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

In [149]:
def getTopArtists():
        
    # empty list
    responses = []

    # inital page and high number of pages (wouldn't it result in an uneccesarily long loop?)
    page = 1
    total_pages = 99999

    while page < total_pages:
        payload = {
            'method': 'chart.gettopartists',
            'limit': 500,
            'page': page
        }

        #see status
        print('Requesting Page {}/{}'.format(page, total_pages))
        #clear output
        clear_output(wait=True)

        # make api call
        response = lastfm_get(payload)

        # if we get an error, print and halt
        if response.status_code != 200:
            print(response.text)
            break

        # extract pagination info
        page = int(response.json()['artists']['@attr']['page'])
        total_pages = int(response.json()['artists']['@attr']['totalPages'])

        # append response
        responses.append(response)

        # if not cached, sleep
        if not getattr(response, 'from_cache', False):
            time.sleep(0.25)

        page += 1

    return responses

In [150]:
responses = getTopArtists()

Requesting Page 7002/7003


In [151]:
import pandas as pd

In [152]:
r0 = responses[0]
r0_json = responses[0].json()
r0_artists = r0_json['artists']['artist']
r0_df = pd.DataFrame(r0_artists)
r0_df.head() 

Unnamed: 0,name,playcount,listeners,mbid,url,streamable,image
0,Taylor Swift,268125255,2494732,20244d07-534f-4eff-b4d4-930878889970,https://www.last.fm/music/Taylor+Swift,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
1,The Weeknd,125834230,1616048,c8b03190-306c-4120-bb0b-6f2ebfc06ea9,https://www.last.fm/music/The+Weeknd,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
2,Billie Eilish,68348860,866564,,https://www.last.fm/music/Billie+Eilish,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
3,Lady Gaga,341894974,4086444,650e7db6-b795-4eb5-a702-5ea2fc46c848,https://www.last.fm/music/Lady+Gaga,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
4,Kanye West,302868003,4648531,164f0d73-1234-4e2c-8743-d77bf2191051,https://www.last.fm/music/Kanye+West,0,[{'#text': 'https://lastfm.freetls.fastly.net/...


In [153]:
frames = [pd.DataFrame(r.json()['artists']['artist']) for r in responses]
artists = pd.concat(frames)
artists.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10100 entries, 0 to 999
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        10100 non-null  object
 1   playcount   10100 non-null  object
 2   listeners   10100 non-null  object
 3   mbid        10100 non-null  object
 4   url         10100 non-null  object
 5   streamable  10100 non-null  object
 6   image       10100 non-null  object
dtypes: object(7)
memory usage: 631.2+ KB


In [154]:
artists = artists.drop('image', axis=1)
artists.head()

Unnamed: 0,name,playcount,listeners,mbid,url,streamable
0,Taylor Swift,268125255,2494732,20244d07-534f-4eff-b4d4-930878889970,https://www.last.fm/music/Taylor+Swift,0
1,The Weeknd,125834230,1616048,c8b03190-306c-4120-bb0b-6f2ebfc06ea9,https://www.last.fm/music/The+Weeknd,0
2,Billie Eilish,68348860,866564,,https://www.last.fm/music/Billie+Eilish,0
3,Lady Gaga,341894974,4086444,650e7db6-b795-4eb5-a702-5ea2fc46c848,https://www.last.fm/music/Lady+Gaga,0
4,Kanye West,302868003,4648531,164f0d73-1234-4e2c-8743-d77bf2191051,https://www.last.fm/music/Kanye+West,0


In [155]:
artists.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10100 entries, 0 to 999
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        10100 non-null  object
 1   playcount   10100 non-null  object
 2   listeners   10100 non-null  object
 3   mbid        10100 non-null  object
 4   url         10100 non-null  object
 5   streamable  10100 non-null  object
dtypes: object(6)
memory usage: 552.3+ KB


In [156]:
artists.describe()

Unnamed: 0,name,playcount,listeners,mbid,url,streamable
count,10100,10100,10100,10100.0,10100,10100
unique,9600,9587,9485,6603.0,9600,1
top,K.Will,178189,32781,,https://www.last.fm/music/Scrim,0
freq,2,3,3,3161.0,2,10100


In [157]:
artist_counts = [len(r.json()['artists']['artist']) for r in responses]


In [158]:
pd.Series(artist_counts).value_counts()

0       6982
500       18
1000       1
100        1
dtype: int64

In [159]:
print(artist_counts[:50])

[500, 100, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [160]:
artists = artists.drop_duplicates().reset_index(drop=True)
artists.describe()

Unnamed: 0,name,playcount,listeners,mbid,url,streamable
count,9600,9600,9600,9600.0,9600,9600
unique,9600,9587,9485,6603.0,9600,1
top,Syd Barrett,286429,85737,,https://www.last.fm/music/The+Bad+Plus,0
freq,1,2,3,2975.0,1,9600


In [161]:
def lookup_tags(artist):
    
    response = lastfm_get({
        'method': 'artist.getTopTags',
        'artist': artist
    })

    # if there's an error, return nothing
    if response.status_code != 200:
        return None
    
    # extract the top three tags and turn them into a string
    tags =[t['name'] for t in response.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    
    return tags_str

In [162]:
from tqdm import tqdm

In [163]:
tqdm.pandas()

In [164]:
artists['tags'] = artists['name'].progress_apply(lookup_tags)

100%|██████████| 9600/9600 [01:36<00:00, 99.13it/s]


In [169]:
# converting listeners and playcounts to int type

artists[["playcount", "listeners"]] = artists[["playcount", "listeners"]].astype(int)

In [170]:
# sorting by number of listeners

artists = artists.sort_values('listeners', ascending= False)
artists.head()

Unnamed: 0,name,playcount,listeners,mbid,url,streamable,tags
24,Coldplay,378637547,5586348,cc197bad-dc9c-440d-a5b5-d52ba2e14234,https://www.last.fm/music/Coldplay,0,"rock, alternative, britpop"
17,Radiohead,531866332,4915128,a74b1b7f-71a5-4011-9441-d0b5e4122711,https://www.last.fm/music/Radiohead,0,"alternative, alternative rock, rock"
10,Rihanna,218380083,4798610,db36a76f-4cdf-43ac-8cd0-5e48092d2bae,https://www.last.fm/music/Rihanna,0,"pop, rnb, female vocalists"
34,Red Hot Chili Peppers,310038207,4787749,8bfac288-ccc5-448d-9573-c33ea2aa5c30,https://www.last.fm/music/Red+Hot+Chili+Peppers,0,"rock, alternative rock, alternative"
31,Eminem,222715169,4741473,b95ce3ff-3d05-4e87-9e01-c97b66af13d4,https://www.last.fm/music/Eminem,0,"rap, Hip-Hop, Eminem"


In [171]:
artists.to_csv('artists.csv', index=False)

In [172]:
artists

Unnamed: 0,name,playcount,listeners,mbid,url,streamable,tags
24,Coldplay,378637547,5586348,cc197bad-dc9c-440d-a5b5-d52ba2e14234,https://www.last.fm/music/Coldplay,0,"rock, alternative, britpop"
17,Radiohead,531866332,4915128,a74b1b7f-71a5-4011-9441-d0b5e4122711,https://www.last.fm/music/Radiohead,0,"alternative, alternative rock, rock"
10,Rihanna,218380083,4798610,db36a76f-4cdf-43ac-8cd0-5e48092d2bae,https://www.last.fm/music/Rihanna,0,"pop, rnb, female vocalists"
34,Red Hot Chili Peppers,310038207,4787749,8bfac288-ccc5-448d-9573-c33ea2aa5c30,https://www.last.fm/music/Red+Hot+Chili+Peppers,0,"rock, alternative rock, alternative"
31,Eminem,222715169,4741473,b95ce3ff-3d05-4e87-9e01-c97b66af13d4,https://www.last.fm/music/Eminem,0,"rap, Hip-Hop, Eminem"
...,...,...,...,...,...,...,...
7859,Jesse Draxler,7616,4478,,https://www.last.fm/music/Jesse+Draxler,0,USA
8401,HVME,18678,4214,,https://www.last.fm/music/HVME,0,
8104,I-Land,70309,4075,1ef11458-7f5b-4c58-a650-89e2ef6110bb,https://www.last.fm/music/I-Land,0,
8967,Universo,13222,4020,,https://www.last.fm/music/Universo,0,deep house
