In [1]:
!pip3 install pprint
from pprint import pprint



In [2]:
!pip3 install beautifulsoup4



In [3]:
!pip3 install --upgrade pip

Requirement already up-to-date: pip in /usr/local/lib/python3.7/site-packages (20.1)


In [4]:
!pip3 install requests



In [5]:
from six.moves.urllib.parse import quote as _quote
from bs4 import BeautifulSoup as _BeautifulSoup
import requests as _requests

__BASE_URL__ = 'https://lyrics.wikia.com'

In [6]:
class LyricsNotFound(Exception):
    __module__ = Exception.__module__

    def __init__(self, message=None):
        super(LyricsNotFound, self).__init__(message)

In [7]:
def urlize(string):
    """Convert string to LyricWikia format"""
    return _quote('_'.join(string.title().split()))

In [8]:
def create_url(artist, song, language):
    """Create the URL in the LyricWikia format"""
    url = __BASE_URL__ + '/wiki/{artist}:{song}'.format(artist=urlize(artist), song=urlize(song))
    if language:
        url += '/{language}'.format(language=urlize(language).lower())
    return url

In [58]:
def get_lyrics_for_all_languages(artist, song, linesep='\n', timeout=None):
    """Retrieve the lyrics of the song in all languages available"""
    url = create_url(artist, song, '')
    response = _requests.get(url, timeout=timeout)
    soup = _BeautifulSoup(response.content, "html.parser")
    lyricboxes = soup.find('table', {'class': 'banner banner-song'})
    result = dict()
    result['default'] = get_lyrics_by_language(artist, song, '', linesep='\n', timeout=None)
    
    for a in lyricboxes.findAll('a', href=True):
        result[a.getText()] = get_lyrics_by_language(artist, song, a['href'].split('/')[-1], linesep='\n', timeout=None)
    
    return result

In [21]:
def get_lyrics_by_language(artist, song, language, linesep='\n', timeout=None):
    """Retrieve the lyrics of the song in a particular language and return the first one in case
    multiple versions are available."""
    return get_all_lyrics(artist, song, language, linesep, timeout)[0]

In [17]:
def get_lyrics(artist, song, language='', linesep='\n', timeout=None):
    """Retrieve the lyrics of the song and return the first one in case
    multiple versions are available."""
    return get_all_lyrics(artist, song, language, linesep, timeout)[0]

In [18]:

def get_all_lyrics(artist, song, language='', linesep=' \n ', timeout=None):
    """Retrieve a list of all the lyrics versions of a song."""
    url = create_url(artist, song, language)
    response = _requests.get(url, timeout=timeout)
    soup = _BeautifulSoup(response.content, "html.parser")
    lyricboxes = soup.findAll('div', {'class': 'lyricbox'})

    if not lyricboxes:
        raise LyricsNotFound('Cannot download lyrics')

    for lyricbox in lyricboxes:
        for br in lyricbox.findAll('br'):
            br.replace_with(linesep)

    return [lyricbox.text.strip() for lyricbox in lyricboxes]

In [11]:
class Song(object):
    """A Song backed by the LyricWikia API"""

    def __init__(self, artist, title):
        self.artist = artist
        self.title = title

    @property
    def lyrics(self):
        """Song lyrics obtained by parsing the LyricWikia page"""
        return get_lyrics(self.artist, self.title,'')

    def __str__(self):
        return "Song(artist='%s', title='%s')" % (self.artist, self.title)

    def __repr__(self):
        return str(self)

In [12]:
class Album(object):
    """An Album backed by the LyricWikia API"""

    def __init__(self, artist, album_data):
        self.artist = artist
        self.title = album_data['album']
        self.year = album_data['year']
        self.songs = [Song(artist, song) for song in album_data['songs']]

    def __str__(self):
        return "Album(artist='%s', title='%s')" % (self.artist, self.title)

    def __repr__(self):
        return str(self)

In [13]:

class Artist(object):
    """An Artist backed by the LyricWikia API"""

    __API__ = __BASE_URL__ + '/api.php?fmt=json&func=getArtist&artist={artist}'

    def __init__(self, name):
        url = self.__API__.format(artist=urlize(name))
        data = _requests.get(url).json()
        self.name = data['artist']
        self.albums = [Album(self.name, album) for album in data['albums']]

    def __str__(self):
        return "Artist(name='%s')" % (self.name)

    def __repr__(self):
        return str(self)

In [59]:
lyrics = get_lyrics_for_all_languages('Daisy×Daisy','Evidence')

In [60]:
pprint (lyrics)

{'English': 'I was always looking for the evidence that I was here\n'
            'When I realized it, it was in my right pocket\n'
            "Today's sky is blue and is clearing up brightly\n"
            'It was as it made the world transparent\n'
            '\n'
            "Even me who said it can't be found and gave up\n"
            'Even you who said it could be found and comforted me\n'
            "The meaning of life doesn't change at all\n"
            '\n'
            'Fragile melody\n'
            'If I can spin out just one sure tone\n'
            "Then it doesn't matter\n"
            'If everything that I believed in was a lie\n'
            '\n'
            "Even when I presented the evidence that wasn't here\n"
            'It was always in my right pocket\n'
            "Today's streets are dark and cutting the stagnation\n"
            'It was as if it was filling up the world\n'
            '\n'
            'Even me who kept on scarring you, saying sorry and ru