A pipeline from users liked songs that are not labelled should then get the lyrics for the songs, parse the lyrics from the web, and then input them into the function that calls the model (if present)

Given:

Returns: Lyrics in a list line by line

Cases to manage:
  * Does song need to be translated?
  * Are there multiple artists being passed? (site automatically handles)

https://developer.spotify.com/documentation/web-api/reference/#/operations/get-track

In [2]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from bs4 import element

In [222]:
#Helps parse miscellaneous tags <i>, </br>, etc,.
def _lyricsHelper(html, lyrics_list):
    for tag in html.childGenerator():
        if type(tag) == element.NavigableString:
            _handleLyricAppend(lyrics_list, tag.text.strip(), index)
        elif tag.name == 'br' and lyrics_list[len(lyrics_list) - 1] != '':
            lyrics_list.append('')

#Reads the HTML for lyrics dividers (if they exist) and appends the lyrics line by line to a list
def _getLyricsFromHTML(html):
    lyrics = html.findAll("div", {"data-lyrics-container" : "true"})
    lyrics = html.findAll("div", {"data-lyrics-container" : "true"})
    lyrics_list = ['']
    index = 0
    for segment in lyrics:
        for a in segment.childGenerator():
            lyric = None
            if type(a) == element.NavigableString:
                lyric = a.strip()
                _handleLyricAppend(lyrics_list, lyric, index)
            else:
                _lyricsHelper(a, lyrics_list)
            if a.name == 'br' and lyrics_list[len(lyrics_list) - 1] != '':
                lyrics_list.append('')
    return lyrics_list

#Helper function to handle appending and manipulating lyrics_list. A new line is generated only for </br> tags
def _handleLyricAppend(lyrics_list, lyric, last_index):
    if lyric is not None:
        last_index = len(lyrics_list) - 1
        #Handle special cases (parenthesis and symbols stick with words for instance)
        if lyrics_list[last_index] != '' and (lyrics_list[last_index][-1] in ['(','[','{','<'] or lyric in [')',']','}','>','!','?',',','.']):
            lyrics_list[last_index] += lyric
        else:
            lyrics_list[last_index] += " " + lyric if lyrics_list[last_index] != '' else lyric

#Determines whether a song will need to be translated (returns the link if it does, otherwise returns None)
def _getSongTranslationLink(html):
    translation_tags = html.findAll('a', class_='TextButton-sc-192nsqv-0 hVAZmF LyricsControls__TextButton-sghmdv-6 hXTHjZ')
    for tag in translation_tags:
        if "english-translations" in tag['href']:
            return tag['href']
    return None

#Determines whether a page exists
def _pageExists(html):
    return html.find('div', class_='render_404') == None
        
#function to scrape lyrics from genius, takes an array of artists, and songname
def scrapeLyrics(artistnames, songname):
    lyrics_list = []
    found = False
    i = 0
    html = None
    while i < len(artistnames) and not(found):
        artistname = artistnames[i]
        artistname2 = str(artistname.replace(' ','-')) if ' ' in artistname else str(artistname)
        songname2 = str(songname.replace(' ','-')) if ' ' in songname else str(songname)
        page_url = 'https://genius.com/'+ artistname2 + '-' + songname2 + '-' + 'lyrics'
        page = requests.get(page_url)
        html = BeautifulSoup(page.text, 'html.parser') 
        found = _pageExists(html)
        i += 1
    if found:
        translation_url = _getSongTranslationLink(html)
        if translation_url is not None:
            page = requests.get(translation_url)
            html = BeautifulSoup(page.text, 'html.parser') 
        lyrics_list = _getLyricsFromHTML(html)
    return lyrics_list
    

#function to display lyrics
def printLyrics(lyrics):
    for line in lyrics:
        print(line)


# function to attach lyrics onto data frame
# artist_name should be inserted as a string
# def lyrics_onto_frame(df1):
#     for i,entry in enumerate(df1.values):
#       test = scrape_lyrics(entry[0], entry[1])
#       df1.loc[i, 'lyrics'] = test
#     return df1

In [223]:
artistname = ["red hot chili peppers"]
songname = "in the snow"
scrapeLyrics(artistname, songname)

['[Verse 1]',
 'My mates have all gone married now',
 'Off living in a kindred cloud',
 'Not that kind',
 "They cuddle up with kitten's bone",
 'Puddle in their beds at home',
 "I still can't find",
 '[Chorus]',
 'Tell me what you wanna see',
 'Tell me what you want',
 "And I'll take my time",
 "And I'll move it forward, now",
 'Do you want to come with me?',
 'Do you want to come?',
 "And I'll take my time",
 "And I'll move it closer, now",
 '[Verse 2]',
 'I check my stupid phone again',
 "No matter that it's 4:00 am",
 'It burns my eyes',
 "A spotlight's born to shine at night",
 'Come what may, it always might',
 'Burn so bright',
 '[Chorus]',
 'Tell me what you wanna see',
 'Tell me what you want',
 "And I'll take my time",
 "And I'll move it forward, now",
 'Do you want to come with mе?',
 'Do you want to come?',
 "And I'll take my time",
 "And I'll movе it closer, now",
 '[Post-Chorus]',
 'Slow rodeo',
 'Roll over, roll over',
 'Slow rodeo',
 'In the snow',
 'Slow rodeo',
 'Hold 

In [221]:
artistname = ["luis fonsi"]
songname = "despacito"
scrapeLyrics(artistname, songname)

0
<i>
 Daddy Yankee
</i>

1
<br/>

2
<i>
 DY!
</i>

3
<br/>

4
<br/>

5
<br/>

6
<i>
 Dididiri Daddy, go!
</i>

7
<br/>

8
<br/>

9
<i>
 DY!
</i>

10
<br/>

11
<i>
 I saw that your gaze was calling me
 <br/>
 Show me the way that I will go
</i>

12
<br/>

13
<br/>

14
<i>
 Daddy Yankee
</i>

15
<br/>

16
<br/>

17
<br/>

18
<br/>

19
<i>
 Oh, yeah!
</i>

20
<br/>

21
<i>
 Now, I'm already liking it more than usual
 <br/>
 All of my senses are asking for more
 <br/>
 We must take this slowly
</i>

22
<br/>

23
<br/>

24
<i>
 Daddy Yankee
</i>

25
<br/>

26
<br/>

27
<br/>

28
<br/>

29
<br/>

30
<br/>

31
<br/>

32
<br/>

33
<br/>

34
<i>
 Turn it up, turn it up, turn it up; turn it up, turn it up
</i>

35
<br/>

36
<i>
 Daddy Yankee
</i>

37
<br/>

38
<br/>

39
<br/>

40
<i>
 Favorites, favorites, baby
</i>

41
<br/>

42
<br/>

43
<br/>

44
<br/>

45
<br/>

46
<br/>

47
<br/>

48
<br/>

49
<br/>

50
<br/>

51
<br/>

52
<br/>

53
<br/>

54
<br/>

55
<br/>

56
<br/>

57
<br/>

58
<br/>



['[Intro: Luis Fonsi & Daddy Yankee]',
 'Oh, Fonsi! DY!',
 'Ooh, oh, no, oh, no, oh',
 'Hey, yeah!',
 'Dididiri Daddy, go!',
 "Yes, you know I've I've spent a long time looking at you",
 'I must dance with you today (DY!)',
 'I saw that your gaze was calling me',
 'Show me the way that I will go',
 '[Verse 1: Luis Fonsi & Daddy Yankee]',
 'Oh!',
 "You, you are the magnet and I'm the metal",
 "I'm getting closer and forming a plan",
 'Just thinking about it accelerates my pulse (Oh, yeah!)',
 "Now, I'm already liking it more than usual",
 'All of my senses are asking for more',
 'We must take this slowly',
 '[Chorus: Luis Fonsi & Daddy Yankee]',
 'Slowly',
 'I want to breathe in your neck slowly',
 'Let me whisper things into your ear',
 "So that you remember if you'rе not with me",
 'Slowly',
 'I want to undress you with kisses slowly',
 'To sign thе walls of your labyrinth',
 'And to make a manuscript of your body',
 '(Turn it up, turn it up, turn it up; turn it up, turn it up)',
 '[P

In [25]:
data = [("bruno mars", "thats what i like")]
df1 = pd.DataFrame(data=data, columns=["Artist", "Song"])
lyrics_onto_frame(df1)

https://genius.com/bruno-mars-thats-what-i-like-lyrics
<!DOCTYPE html>
<html>
 <head>
  <title>
   Bruno Mars – That's What I Like Lyrics | Genius Lyrics
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width,initial-scale=1" name="viewport"/>
  <meta content="app-id=709482991" name="apple-itunes-app"/>
  <link href="https://assets.genius.com/images/apple-touch-icon.png?1677774882" rel="apple-touch-icon"/>
  <link href="https://assets.genius.com/images/apple-touch-icon.png?1677774882" rel="apple-touch-icon"/>
  <!-- Mobile IE allows us to activate ClearType technology for smoothing fonts for easy reading -->
  <meta content="on" http-equiv="cleartype"/>
  <meta content="f63347d284f184b0" name="y_key"/>
  <meta content="Genius" property="og:site_name">
   <meta content="265539304824" property="fb:app_id">
    <meta content="308252472676410" property="fb:pages">
     <link href="https://genius.com/opensearch.xml" rel="search"

AttributeError: 'NoneType' object has no attribute 'prettify'