In [11]:
from lyricsgenius.api import Genius
from config import genius_client_access_token, s3_bucket
import yaml
import json
from tqdm.notebook import tqdm
import boto3
import pandas as pd

In [3]:
genius_api = Genius(genius_client_access_token, verbose=False)

In [5]:
MAX_SONGS = 3

with open("artist_list.yaml", 'r') as f:
    artists_names = yaml.load(f, Loader=yaml.FullLoader)


songs = []
for artist_name in tqdm(artists_names):
    try:
        artist = genius_api.search_artist(artist_name, max_songs=MAX_SONGS)
    except KeyboardInterrupt as e:
        raise e
    except:
        print(f"Could not fetch results for artist : {artist_name}. Continuing...")
        continue

    for song in artist.songs:
        songs.append({
            "artist_name": artist_name,
            "artist_name_genius": artist.name,
            "song": song.title,
            "lyrics": song.lyrics,
            "year": song.year[:4] if song.year is not None else None,
            "featured_artists": song.featured_artists,
            "url": song._url
        })

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [8]:
bucket = boto3.resource("s3").Bucket(s3_bucket)

In [9]:
bucket.Object(key="lyrics_data.json").put(Body=json.dumps(songs, sort_keys=True, indent=4))

{'ResponseMetadata': {'RequestId': '46C24C0AE540D40F',
  'HostId': 'iWnxN3HvYx86vQlCM1oPxGKFMWi8ArsK18879Qyz9ouDf4kxIuMNAIRYRpODFGs8gs4J5vgakfQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'iWnxN3HvYx86vQlCM1oPxGKFMWi8ArsK18879Qyz9ouDf4kxIuMNAIRYRpODFGs8gs4J5vgakfQ=',
   'x-amz-request-id': '46C24C0AE540D40F',
   'date': 'Mon, 02 Dec 2019 10:52:24 GMT',
   'etag': '"8dbed6e7038755331c1258fde384055d"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"8dbed6e7038755331c1258fde384055d"'}

In [12]:
df_songs = pd.DataFrame(songs)
df_songs.sample(5)

Unnamed: 0,artist_name,artist_name_genius,song,lyrics,year,featured_artists,url
1,7 jaws,7 Jaws,3h30,"[Refrain]\nYuh, 3H30 la tequila s'ressent dans...",2016.0,[],https://genius.com/7-jaws-3h30-lyrics
3,a2h,A2H,Une dernière fois,"[Intro]\nPromis : demain, j'arrête cette merde...",2016.0,[],https://genius.com/A2h-une-derniere-fois-lyrics
8,akhenaton,Akhenaton,La Fin de leur Monde,Avant-propos :[Couplet 1 : Shurik'n]\nRegarde ...,2006.0,"[{'api_path': '/artists/1967', 'header_image_u...",https://genius.com/Akhenaton-la-fin-de-leur-mo...
7,akhenaton,Akhenaton,Bad boys de Marseille (Part 2),"Canaille, canaille, ah le mauvais garçon.(x3)\...",1996.0,"[{'api_path': '/artists/13333', 'header_image_...",https://genius.com/Akhenaton-bad-boys-de-marse...
6,akhenaton,Akhenaton,"Mon texte, le savon","[Couplet 1]\nC'que j'te livre, non rien de com...",,[],https://genius.com/Akhenaton-mon-texte-le-savo...


In [14]:
df_song_count = df_songs.groupby("artist_name").size().reset_index().rename(columns={0: "song_count"})
df_song_count.sort_values("song_count")

Unnamed: 0,artist_name,song_count
0,7 jaws,3
1,a2h,3
2,akhenaton,3
