A pipeline from users liked songs that are not labelled should then get the lyrics for the songs, parse the lyrics from the web, and then input them into the function that calls the model (if present)

Given:

Returns: Lyrics in a list line by line

Cases to manage:
  * Does song need to be translated?
  * Are there multiple artists being passed? (site automatically handles)

https://developer.spotify.com/documentation/web-api/reference/#/operations/get-track

In [1]:
import requests
from bs4 import BeautifulSoup
from bs4 import element

In [4]:
#Helps parse miscellaneous tags <i>, </br>, etc,.
def _lyricsHelper(html, lyrics_list):
    for tag in html.childGenerator():
        if type(tag) == element.NavigableString:
            _handleLyricAppend(lyrics_list, tag.text.strip())
        elif tag.name == 'br' and lyrics_list[len(lyrics_list) - 1] != '':
            lyrics_list.append('')
        elif html.name == 'a':
            _lyricsHelper(tag, lyrics_list)

#Reads the HTML for lyrics dividers (if they exist) and appends the lyrics line by line to a list
def _getLyricsFromHTML(html):
    lyrics = html.findAll("div", {"data-lyrics-container" : "true"})
    lyrics_list = ['']
    for segment in lyrics:
        for a in segment.childGenerator():
            lyric = None
            if type(a) == element.NavigableString:
                lyric = a.strip()
                _handleLyricAppend(lyrics_list, lyric)
            else:
                _lyricsHelper(a, lyrics_list)
            if a.name == 'br' and lyrics_list[len(lyrics_list) - 1] != '':
                lyrics_list.append('')
    return lyrics_list

#Helper function to handle appending and manipulating lyrics_list. A new line is generated only for </br> tags
def _handleLyricAppend(lyrics_list, lyric):
    if lyric is not None:
        last_index = len(lyrics_list) - 1
        #Handle special cases (parenthesis and symbols stick with words for instance)
        if lyrics_list[last_index] != '' and (lyrics_list[last_index][-1] in ['(','[','{','<'] or lyric in [')',']','}','>','!','?',',','.']):
            lyrics_list[last_index] += lyric
        else:
            lyrics_list[last_index] += " " + lyric if lyrics_list[last_index] != '' else lyric

#Determines whether a song will need to be translated (returns the link if it does, otherwise returns None)
def _getSongTranslationLink(html):
    translation_tags = html.findAll('a', class_='TextButton-sc-192nsqv-0 hVAZmF LyricsControls__TextButton-sghmdv-6 hXTHjZ')
    for tag in translation_tags:
        if "english-translations" in tag['href']:
            return tag['href']
    return None

#Determines whether a page exists
def _pageExists(html):
    return html.find('div', class_='render_404') == None
        
#function to scrape lyrics from genius, takes an array of artists, and songname
def scrapeLyrics(artistnames, songname):
    lyrics_list = []
    found = False
    i = 0
    html = None
    while i < len(artistnames) and not(found):
        artistname = artistnames[i]
        artistname2 = str(artistname.replace(' ','-')) if ' ' in artistname else str(artistname)
        songname2 = str(songname.replace(' ','-')) if ' ' in songname else str(songname)
        page_url = 'https://genius.com/'+ artistname2 + '-' + songname2 + '-' + 'lyrics'
        page = requests.get(page_url)
        html = BeautifulSoup(page.text, 'html.parser') 
        found = _pageExists(html)
        i += 1
    if found:
        #check if there is an english translation
        translation_url = _getSongTranslationLink(html)
        if translation_url is not None:
            page = requests.get(translation_url)
            html = BeautifulSoup(page.text, 'html.parser') 
            lyrics_list = _getLyricsFromHTML(html)
        else:
            #If there isn't a translation, make sure it's in english in the first place
            english = False
            for script in html.findAll('script'):
                if "language\\\":\\\"en" in str(script):
                    english = True
            if english: lyrics_list = _getLyricsFromHTML(html)
    return lyrics_list
    

#function to display lyrics
def printLyrics(lyrics):
    for line in lyrics:
        print(line)

In [5]:
artistname = ["red hot chili peppers"]
songname = "in the snow"
scrapeLyrics(artistname, songname)

['[Verse 1]',
 'My mates have all gone married now',
 'Off living in a kindred cloud',
 'Not that kind',
 "They cuddle up with kitten's bone",
 'Puddle in their beds at home',
 "I still can't find",
 '[Chorus]',
 'Tell me what you wanna see',
 'Tell me what you want',
 "And I'll take my time",
 "And I'll move it forward, now",
 'Do you want to come with me?',
 'Do you want to come?',
 "And I'll take my time",
 "And I'll move it closer, now",
 '[Verse 2]',
 'I check my stupid phone again',
 "No matter that it's 4:00 am",
 'It burns my eyes',
 "A spotlight's born to shine at night",
 'Come what may, it always might',
 'Burn so bright',
 '[Chorus]',
 'Tell me what you wanna see',
 'Tell me what you want',
 "And I'll take my time",
 "And I'll move it forward, now",
 'Do you want to come with mе?',
 'Do you want to come?',
 "And I'll take my time",
 "And I'll movе it closer, now",
 '[Post-Chorus]',
 'Slow rodeo',
 'Roll over, roll over',
 'Slow rodeo',
 'In the snow',
 'Slow rodeo',
 'Hold 

In [6]:
artistname = ["luis fonsi"]
songname = "despacito"
# artistname = ["the idan raichel project"]
# songname = "boee"
scrapeLyrics(artistname, songname)

['[Intro: Luis Fonsi & Daddy Yankee]',
 'Oh, Fonsi! DY!',
 'Ooh, oh, no, oh, no, oh',
 'Hey, yeah!',
 'Dididiri Daddy, go!',
 "Yes, you know I've I've spent a long time looking at you",
 'I must dance with you today (DY!)',
 'I saw that your gaze was calling me',
 'Show me the way that I will go',
 '[Verse 1: Luis Fonsi & Daddy Yankee]',
 'Oh!',
 "You, you are the magnet and I'm the metal",
 "I'm getting closer and forming a plan",
 'Just thinking about it accelerates my pulse (Oh, yeah!)',
 "Now, I'm already liking it more than usual",
 'All of my senses are asking for more',
 'We must take this slowly',
 '[Chorus: Luis Fonsi & Daddy Yankee]',
 'Slowly',
 'I want to breathe in your neck slowly',
 'Let me whisper things into your ear',
 "So that you remember if you'rе not with me",
 'Slowly',
 'I want to undress you with kisses slowly',
 'To sign thе walls of your labyrinth',
 'And to make a manuscript of your body',
 '(Turn it up, turn it up, turn it up; turn it up, turn it up)',
 '[P

In [7]:
artistname = ["the idan raichel project"]
songname = "boee"
scrapeLyrics(artistname, songname)

[]

In [8]:
artistname = ["bruno mars"]
songname = "thats what i like"
scrapeLyrics(artistname, songname)

['[Verse 1]',
 'Ayy, ayy, ayy',
 'I got a condo in Manhattan',
 "Baby girl, what's happenin'?",
 'You and your ass invited',
 "So go on and get to clappin'",
 'So pop it for a player',
 'Pop, pop it for me',
 'Turn around and drop it for a player',
 'Drop, drop it for me',
 "I'll rent a beach house in Miami (-ami)",
 'Wake up with no jammies (Nope)',
 'Lobster tail for dinner',
 'Julio, serve that scampi (Julio!)',
 'You got it if you want it',
 'Got, got it if you want it',
 'Said, you got it if you want it',
 'Take my wallet if you want it now',
 '[Pre-Chorus]',
 "Jump in the Cadillac, girl, let's put some miles on it",
 'Anything you want, just to put a smile on it',
 'You deserve it, baby, you deserve it all',
 "And I'm gonna give it to you",
 '[Chorus]',
 "Gold jewellery shinin' so bright",
 'Strawberry champagne on ice',
 "Lucky for you, that's what I like, that's what I like",
 "Lucky for you, that's what I like, that's what I like",
 'Sex by the fire at night',
 'Silk sheets an

In [56]:
# artistname = "luis fonsi"
# songname = "despacito"
# artistname2 = str(artistname.replace(' ','-')) if ' ' in artistname else str(artistname)
# songname2 = str(songname.replace(' ','-')) if ' ' in songname else str(songname)
# page_url = 'https://genius.com/'+ artistname2 + '-' + songname2 + '-' + 'lyrics'
# page = requests.get(page_url)
# html = BeautifulSoup(page.text, 'html.parser') 