| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| #-*- coding: UTF-8 -*- | ||
| ''' | ||
| Scraper for https://lrclib.net/ | ||
| lrclib | ||
| https://github.com/rtcq/syncedlyrics | ||
| ''' | ||
|
|
||
| import requests | ||
| import difflib | ||
|
|
||
| import sys | ||
| from optparse import OptionParser | ||
| from common import * | ||
|
|
||
| __author__ = "Paul Harrison and ronie" | ||
| __title__ = "LrcLib" | ||
| __description__ = "Search https://lrclib.net for lyrics" | ||
| __priority__ = "110" | ||
| __version__ = "0.1" | ||
| __syncronized__ = True | ||
|
|
||
|
|
||
| debug = False | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'https://lrclib.net/api/search?q=%s-%s' | ||
| self.LYRIC_URL = 'https://lrclib.net/api/get/%i' | ||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| try: | ||
| url = self.SEARCH_URL % (lyrics.artist, lyrics.title) | ||
| response = requests.get(url, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return False | ||
| links = [] | ||
| for item in result: | ||
| artistname = item['artistName'] | ||
| songtitle = item['name'] | ||
| songid = item['id'] | ||
| if (difflib.SequenceMatcher(None, lyrics.artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), songtitle.lower()).ratio() > 0.8): | ||
| links.append((artistname + ' - ' + songtitle, self.LYRIC_URL % songid, artistname, songtitle)) | ||
| if len(links) == 0: | ||
| return False | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr: | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return False | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,url,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search url: %s' % (__title__, url)) | ||
| response = requests.get(url, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return None | ||
| if 'syncedLyrics' in result: | ||
| lyrics = result['syncedLyrics'] | ||
| return lyrics | ||
|
|
||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'lrclib.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,192 @@ | ||
| # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*- | ||
| """ | ||
| Scraper for https://www.lyricsify.com/ | ||
| ronie | ||
| """ | ||
|
|
||
| import requests | ||
| import re | ||
| import difflib | ||
| from bs4 import BeautifulSoup | ||
|
|
||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
|
|
||
| __author__ = "Paul Harrison and ronie" | ||
| __title__ = "Lyricsify" | ||
| __description__ = "Search https://www.lyricsify.com for lyrics" | ||
| __priority__ = "130" | ||
| __version__ = "0.1" | ||
| __syncronized__ = True | ||
|
|
||
| debug = False | ||
|
|
||
| UserAgent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"} | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'https://www.lyricsify.com/lyrics/%s/%s' | ||
| self.LYRIC_URL = 'https://www.lyricsify.com%s' | ||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| artist = lyrics.artist.replace(' ', '-') | ||
| title = lyrics.title.replace(' ', '-') | ||
| try: | ||
| url = self.SEARCH_URL % (artist, title) | ||
| search = requests.get(url, headers=UserAgent, timeout=10) | ||
| response = search.text | ||
| except: | ||
| return False | ||
| links = [] | ||
| soup = BeautifulSoup(response, 'html.parser') | ||
| for link in soup.find_all('a'): | ||
| if link.string and link.get('href').startswith('/lrc/'): | ||
| foundartist = link.string.split(' - ', 1)[0] | ||
| # some links don't have a proper 'artist - title' format | ||
| try: | ||
| foundsong = link.string.split(' - ', 1)[1].rstrip('.lrc') | ||
| except: | ||
| continue | ||
| if (difflib.SequenceMatcher(None, artist.lower(), foundartist.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), foundsong.lower()).ratio() > 0.8): | ||
| links.append((foundartist + ' - ' + foundsong, self.LYRIC_URL % link.get('href'), foundartist, foundsong)) | ||
| if len(links) == 0: | ||
| return False | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr: | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return False | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,url,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search url: %s' % (__title__, url)) | ||
| search = requests.get(url, headers=UserAgent, timeout=10) | ||
| response = search.text | ||
| except: | ||
| return None | ||
| matchcode = re.search('/h3>(.*?)</div', response, flags=re.DOTALL) | ||
| if matchcode: | ||
| lyricscode = (matchcode.group(1)) | ||
| cleanlyrics = re.sub('<[^<]+?>', '', lyricscode) | ||
| return cleanlyrics | ||
|
|
||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'lyricsify.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,184 @@ | ||
| #-*- coding: UTF-8 -*- | ||
| """ | ||
| Scraper for https://www.megalobiz.com/ | ||
| megalobiz | ||
| """ | ||
|
|
||
| import requests | ||
| import re | ||
| from bs4 import BeautifulSoup | ||
|
|
||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
| __author__ = "Paul Harrison and 'ronie'" | ||
| __title__ = "Megalobiz" | ||
| __description__ = "Search https://www.megalobiz.com/ for lyrics" | ||
| __version__ = "0.1" | ||
| __priority__ = "400" | ||
| __syncronized__ = True | ||
|
|
||
| debug = False | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'https://www.megalobiz.com/search/all?qry=%s-%s&searchButton.x=0&searchButton.y=0' | ||
| self.LYRIC_URL = 'https://www.megalobiz.com/%s' | ||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| try: | ||
| url = self.SEARCH_URL % (lyrics.artist, lyrics.title) | ||
| response = requests.get(url, timeout=10) | ||
| result = response.text | ||
| except: | ||
| return None | ||
| links = [] | ||
| soup = BeautifulSoup(result, 'html.parser') | ||
| for link in soup.find_all('a'): | ||
| if link.get('href') and link.get('href').startswith('/lrc/maker/'): | ||
| linktext = link.text.replace('_', ' ').strip() | ||
| if lyrics.artist.lower() in linktext.lower() and lyrics.title.lower() in linktext.lower(): | ||
| links.append((linktext, self.LYRIC_URL % link.get('href'), lyrics.artist, lyrics.title)) | ||
| if len(links) == 0: | ||
| return None | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr: | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return False | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,url,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search url: %s' % (__title__, url)) | ||
| response = requests.get(url, timeout=10) | ||
| result = response.text | ||
| except: | ||
| return None | ||
| matchcode = re.search('span id="lrc_[0-9]+_lyrics">(.*?)</span', result, flags=re.DOTALL) | ||
| if matchcode: | ||
| lyricscode = (matchcode.group(1)) | ||
| cleanlyrics = re.sub('<[^<]+?>', '', lyricscode) | ||
| return cleanlyrics | ||
|
|
||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'megalobiz.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Perform self-test for dependencies.") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| if (len(args) > 0): | ||
| utilities.log('ERROR: invalid arguments found') | ||
| sys.exit(1) | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,184 @@ | ||
| # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*- | ||
| """ | ||
| Scraper for http://music.163.com/ | ||
| osdlyrics | ||
| """ | ||
|
|
||
| import requests | ||
| import re | ||
| import random | ||
| import difflib | ||
|
|
||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
| __author__ = "Paul Harrison and ronie" | ||
| __title__ = "Music163" | ||
| __description__ = "Lyrics scraper for http://music.163.com/" | ||
| __priority__ = "500" | ||
| __version__ = "0.1" | ||
| __syncronized__ = True | ||
|
|
||
| debug = False | ||
|
|
||
| headers = {} | ||
| headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0' | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'http://music.163.com/api/search/get' | ||
| self.LYRIC_URL = 'http://music.163.com/api/song/lyric' | ||
|
|
||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| artist = lyrics.artist.replace(' ', '+') | ||
| title = lyrics.title.replace(' ', '+') | ||
| search = '?s=%s+%s&type=1' % (artist, title) | ||
| try: | ||
| url = self.SEARCH_URL + search | ||
| response = requests.get(url, headers=headers, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return False | ||
| links = [] | ||
| if 'result' in result and 'songs' in result['result']: | ||
| for item in result['result']['songs']: | ||
| artists = "+&+".join([a["name"] for a in item["artists"]]) | ||
| if (difflib.SequenceMatcher(None, artist.lower(), artists.lower()).ratio() > 0.6) and (difflib.SequenceMatcher(None, title.lower(), item['name'].lower()).ratio() > 0.8): | ||
| links.append((artists + ' - ' + item['name'], self.LYRIC_URL + '?id=' + str(item['id']) + '&lv=-1&kv=-1&tv=-1', artists, item['name'])) | ||
| if len(links) == 0: | ||
| return False | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr and lyr.startswith('['): | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return None | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,url,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search url: %s' % (__title__, url)) | ||
| response = requests.get(url, headers=headers, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return None | ||
| if 'lrc' in result: | ||
| return result['lrc']['lyric'] | ||
|
|
||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'music163.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*- | ||
| """ | ||
| Scraper for https://www.musixmatch.com | ||
| taxigps | ||
| """ | ||
|
|
||
| import os | ||
| import requests | ||
| import re | ||
| import random | ||
| import difflib | ||
| from bs4 import BeautifulSoup | ||
|
|
||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
| __author__ = "Paul Harrison and 'ronie'" | ||
| __title__ = "Musixmatch" | ||
| __description__ = "Search https://www.musixmatch.com for lyrics" | ||
| __priority__ = "210" | ||
| __version__ = "0.1" | ||
| __syncronized__ = False | ||
|
|
||
| debug = False | ||
|
|
||
| headers = {} | ||
| headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0' | ||
|
|
||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'https://www.musixmatch.com/search/' | ||
| self.LYRIC_URL = 'https://www.musixmatch.com' | ||
|
|
||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| artist = lyrics.artist.replace(' ', '+') | ||
| title = lyrics.title.replace(' ', '+') | ||
| search = '%s+%s' % (artist, title) | ||
| try: | ||
| url = self.SEARCH_URL + search | ||
| response = requests.get(url, headers=headers, timeout=10) | ||
| result = response.text | ||
| except: | ||
| return False | ||
| links = [] | ||
| soup = BeautifulSoup(result, 'html.parser') | ||
| for item in soup.find_all('li', {'class': 'showArtist'}): | ||
| artistname = item.find('a', {'class': 'artist'}).get_text() | ||
| songtitle = item.find('a', {'class': 'title'}).get_text() | ||
| url = item.find('a', {'class': 'title'}).get('href') | ||
| if (difflib.SequenceMatcher(None, artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), songtitle.lower()).ratio() > 0.8): | ||
| links.append((artistname + ' - ' + songtitle, self.LYRIC_URL + url, artistname, songtitle)) | ||
| if len(links) == 0: | ||
| return False | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr: | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return False | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,url,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search url: %s' % (__title__, url)) | ||
| response = requests.get(url, headers=headers, timeout=10) | ||
| result = response.text | ||
| except: | ||
| return None | ||
| soup = BeautifulSoup(result, 'html.parser') | ||
| lyr = soup.find_all('span', {'class': 'lyrics__content__ok'}) | ||
| if lyr: | ||
| lyrics = '' | ||
| for part in lyr: | ||
| lyrics = lyrics + part.get_text() + '\n' | ||
| return lyrics | ||
| else: | ||
| lyr = soup.find_all('span', {'class': 'lyrics__content__error'}) | ||
| if lyr: | ||
| lyrics = '' | ||
| for part in lyr: | ||
| lyrics = lyrics + part.get_text() + '\n' | ||
| return lyrics | ||
|
|
||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'musixmatch.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,228 @@ | ||
| # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*- | ||
| """ | ||
| Scraper for https://www.musixmatch.com/ | ||
| ronie | ||
| https://github.com/rtcq/syncedlyrics | ||
| """ | ||
|
|
||
| import requests | ||
| import json | ||
| import time | ||
| import difflib | ||
|
|
||
| import os | ||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
| __author__ = "Paul Harrison and ronie" | ||
| __title__ = "MusixMatchLRC" | ||
| __description__ = "Search http://musixmatch.com for lyrics" | ||
| __priority__ = "100" | ||
| __version__ = "0.1" | ||
| __syncronized__ = True | ||
|
|
||
| debug = False | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| self.SEARCH_URL = 'https://apic-desktop.musixmatch.com/ws/1.1/%s' | ||
| self.session = requests.Session() | ||
| self.session.headers.update( | ||
| { | ||
| "authority": "apic-desktop.musixmatch.com", | ||
| "cookie": "AWSELBCORS=0; AWSELB=0", | ||
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0", | ||
| } | ||
| ) | ||
| self.current_time = int(time.time()) | ||
|
|
||
| def get_token(self): | ||
| self.token = '' | ||
| tokenpath = os.path.join(utilities.getCacheDir(), 'musixmatch_token') | ||
| if os.path.exists(tokenpath): | ||
| tokenfile = open(tokenpath, 'r') | ||
| tokendata = json.load(tokenfile) | ||
| tokenfile.close() | ||
| cached_token = tokendata.get("token") | ||
| expiration_time = tokendata.get("expiration_time") | ||
| if cached_token and expiration_time and self.current_time < expiration_time: | ||
| self.token = cached_token | ||
| if not self.token: | ||
| try: | ||
| url = self.SEARCH_URL % 'token.get' | ||
| query = [('user_language', 'en'), ('app_id', 'web-desktop-app-v1.0'), ('t', self.current_time)] | ||
| response = self.session.get(url, params=query, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return None | ||
| if 'message' in result and 'body' in result["message"] and 'user_token' in result["message"]["body"]: | ||
| self.token = result["message"]["body"]["user_token"] | ||
| expiration_time = self.current_time + 600 | ||
| tokendata = {} | ||
| tokendata['token'] = self.token | ||
| tokendata['expiration_time'] = expiration_time | ||
| tokenfile = open(tokenpath, 'w') | ||
| json.dump(tokendata, tokenfile) | ||
| tokenfile.close() | ||
| return self.token | ||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| self.token = self.get_token() | ||
| if not self.token: | ||
| return False | ||
| artist = lyrics.artist.replace(' ', '+') | ||
| title = lyrics.title.replace(' ', '+') | ||
| search = '%s - %s' % (artist, title) | ||
| try: | ||
| url = self.SEARCH_URL % 'track.search' | ||
| query = [('q', search), ('page_size', '5'), ('page', '1'), ('s_track_rating', 'desc'), ('quorum_factor', '1.0'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)] | ||
| response = requests.get(url, params=query, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return False | ||
| links = [] | ||
| if 'message' in result and 'body' in result["message"] and 'track_list' in result["message"]["body"] and result["message"]["body"]["track_list"]: | ||
| for item in result["message"]["body"]["track_list"]: | ||
| artistname = item['track']['artist_name'] | ||
| songtitle = item['track']['track_name'] | ||
| trackid = item['track']['track_id'] | ||
| if (difflib.SequenceMatcher(None, artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, title.lower(), songtitle.lower()).ratio() > 0.8): | ||
| links.append((artistname + ' - ' + songtitle, trackid, artistname, songtitle)) | ||
| if len(links) == 0: | ||
| return False | ||
| elif len(links) > 1: | ||
| lyrics.list = links | ||
| for link in links: | ||
| lyr = self.get_lyrics_from_list(link) | ||
| if lyr: | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| return False | ||
|
|
||
| def get_lyrics_from_list(self, link): | ||
| title,trackid,artist,song = link | ||
| try: | ||
| utilities.log(debug, '%s: search track id: %s' % (__title__, trackid)) | ||
| url = self.SEARCH_URL % 'track.subtitle.get' | ||
| query = [('track_id', trackid), ('subtitle_format', 'lrc'), ('app_id', 'web-desktop-app-v1.0'), ('usertoken', self.token), ('t', self.current_time)] | ||
| response = requests.get(url, params=query, timeout=10) | ||
| result = response.json() | ||
| except: | ||
| return None | ||
| if 'message' in result and 'body' in result["message"] and 'subtitle' in result["message"]["body"] and 'subtitle_body' in result["message"]["body"]["subtitle"]: | ||
| lyrics = result["message"]["body"]["subtitle"]["subtitle_body"] | ||
| return lyrics | ||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Dire Straits' | ||
| lyrics.album = 'Brothers In Arms' | ||
| lyrics.title = 'Money For Nothing' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'musixmatchlrc.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
| if __name__ == '__main__': | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| # -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*- | ||
| """ | ||
| Scraper for https://supermusic.cz | ||
| Jose Riha | ||
| """ | ||
|
|
||
| import re | ||
| import requests | ||
| import html | ||
|
|
||
| import os | ||
| import sys | ||
| from optparse import OptionParser | ||
| from common import utilities | ||
|
|
||
| __author__ = "Paul Harrison and Jose Riha" | ||
| __title__ = "SuperMusic" | ||
| __description__ = "Search https://supermusic.cz for lyrics" | ||
| __priority__ = "250" | ||
| __version__ = "0.1" | ||
| __syncronized__ = False | ||
|
|
||
| debug = False | ||
|
|
||
| class LyricsFetcher: | ||
| def __init__( self ): | ||
| return | ||
|
|
||
| def get_lyrics(self, lyrics): | ||
| utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) | ||
|
|
||
| artist = lyrics.artist.lower() | ||
| title = lyrics.title.lower() | ||
|
|
||
| try: | ||
| req = requests.post('https://supermusic.cz/najdi.php', data={'hladane': title, 'typhladania': 'piesen', 'fraza': 'off'}) | ||
| response = req.text | ||
| except: | ||
| return False | ||
| req.close() | ||
| url = None | ||
| try: | ||
| items = re.search(r'Počet nájdených piesní.+<br><br>(.*)<BR>', response, re.S).group(1) | ||
| for match in re.finditer(r'<a href=(?P<url>"[^"]+?") target="_parent"><b>(?P<artist>.*?)</b></a> - (?P<type>.+?) \(<a href', items): | ||
| matched_url, matched_artist, matched_type = match.groups() | ||
| if matched_type not in ('text', 'akordy a text'): | ||
| continue | ||
| if matched_artist.lower() == artist: | ||
| url = matched_url.strip('"') | ||
| break | ||
| except: | ||
| return False | ||
| print(url) | ||
| if not url: | ||
| return False | ||
|
|
||
| try: | ||
| req = requests.get('https://supermusic.cz/%s' % url) | ||
| response = req.text | ||
| lyr = re.search(r'class=piesen>(.*?)</font>', response, re.S).group(1) | ||
| lyr = re.sub(r'<sup>.*?</sup>', '', lyr) | ||
| lyr = re.sub(r'<br\s*/>\s*', '\n', lyr) | ||
| lyr = re.sub(r'<!--.*?-->', '', lyr, flags=re.DOTALL) | ||
| lyr = re.sub(r'<[^>]*?>', '', lyr, flags=re.DOTALL) | ||
| lyr = lyr.strip('\r\n') | ||
| lyr = html.unescape(lyr) | ||
| lyrics.lyrics = lyr | ||
| return True | ||
| except: | ||
| return False | ||
|
|
||
| def performSelfTest(): | ||
| found = False | ||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
| lyrics.artist = 'Karel Gott' | ||
| lyrics.album = '' | ||
| lyrics.title = 'Trezor' | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| found = fetcher.get_lyrics(lyrics) | ||
|
|
||
| if found: | ||
| utilities.log(True, "Everything appears in order.") | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
|
|
||
| utilities.log(True, "The lyrics for the test search failed!") | ||
| sys.exit(1) | ||
|
|
||
| def buildLyrics(lyrics): | ||
| from lxml import etree | ||
| xml = etree.XML(u'<lyrics></lyrics>') | ||
| etree.SubElement(xml, "artist").text = lyrics.artist | ||
| etree.SubElement(xml, "album").text = lyrics.album | ||
| etree.SubElement(xml, "title").text = lyrics.title | ||
| etree.SubElement(xml, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
| etree.SubElement(xml, "grabber").text = lyrics.source | ||
|
|
||
| lines = lyrics.lyrics.splitlines() | ||
| for line in lines: | ||
| etree.SubElement(xml, "lyric").text = line | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(xml, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def buildVersion(): | ||
| from lxml import etree | ||
| version = etree.XML(u'<grabber></grabber>') | ||
| etree.SubElement(version, "name").text = __title__ | ||
| etree.SubElement(version, "author").text = __author__ | ||
| etree.SubElement(version, "command").text = 'supermusic.py' | ||
| etree.SubElement(version, "type").text = 'lyrics' | ||
| etree.SubElement(version, "description").text = __description__ | ||
| etree.SubElement(version, "version").text = __version__ | ||
| etree.SubElement(version, "priority").text = __priority__ | ||
| etree.SubElement(version, "syncronized").text = 'True' if __syncronized__ else 'False' | ||
|
|
||
| utilities.log(True, utilities.convert_etree(etree.tostring(version, encoding='UTF-8', | ||
| pretty_print=True, xml_declaration=True))) | ||
| sys.exit(0) | ||
|
|
||
| def main(): | ||
| global debug | ||
|
|
||
| parser = OptionParser() | ||
|
|
||
| parser.add_option('-v', "--version", action="store_true", default=False, | ||
| dest="version", help="Display version and author") | ||
| parser.add_option('-t', "--test", action="store_true", default=False, | ||
| dest="test", help="Test grabber with a know good search") | ||
| parser.add_option('-s', "--search", action="store_true", default=False, | ||
| dest="search", help="Search for lyrics.") | ||
| parser.add_option('-a', "--artist", metavar="ARTIST", default=None, | ||
| dest="artist", help="Artist of track.") | ||
| parser.add_option('-b', "--album", metavar="ALBUM", default=None, | ||
| dest="album", help="Album of track.") | ||
| parser.add_option('-n', "--title", metavar="TITLE", default=None, | ||
| dest="title", help="Title of track.") | ||
| parser.add_option('-f', "--filename", metavar="FILENAME", default=None, | ||
| dest="filename", help="Filename of track.") | ||
| parser.add_option('-d', '--debug', action="store_true", default=False, | ||
| dest="debug", help=("Show debug messages")) | ||
|
|
||
| opts, args = parser.parse_args() | ||
|
|
||
| lyrics = utilities.Lyrics() | ||
| lyrics.source = __title__ | ||
| lyrics.syncronized = __syncronized__ | ||
|
|
||
| if opts.debug: | ||
| debug = True | ||
|
|
||
| if opts.version: | ||
| buildVersion() | ||
|
|
||
| if opts.test: | ||
| performSelfTest() | ||
|
|
||
| if opts.artist: | ||
| lyrics.artist = opts.artist | ||
| if opts.album: | ||
| lyrics.album = opts.album | ||
| if opts.title: | ||
| lyrics.title = opts.title | ||
| if opts.filename: | ||
| lyrics.filename = opts.filename | ||
|
|
||
| fetcher = LyricsFetcher() | ||
| if fetcher.get_lyrics(lyrics): | ||
| buildLyrics(lyrics) | ||
| sys.exit(0) | ||
| else: | ||
| utilities.log(True, "No lyrics found for this track") | ||
| sys.exit(1) | ||
|
|
||
| if __name__ == '__main__': | ||
| main() |