From 853c9bd261a1a62d720c29b0b87e73e1b1f72893 Mon Sep 17 00:00:00 2001 From: Paul Harrison Date: Thu, 20 Jul 2017 11:43:19 +0100 Subject: [PATCH] mythmusic: various tweaks to the lyrics grabbers * tweak the priorities of the grabbers * fix the debugging on a couple of the grabbers * attempt to fix the Baidu grabber * Update the README --- .../scripts/metadata/Music/lyrics/README | 28 ++++++++------- .../scripts/metadata/Music/lyrics/baidu.py | 35 +++++++++---------- .../metadata/Music/lyrics/darklyrics.py | 2 +- .../scripts/metadata/Music/lyrics/embedlrc.py | 4 +-- .../scripts/metadata/Music/lyrics/genius.py | 2 +- .../scripts/metadata/Music/lyrics/gomaudio.py | 8 ++++- .../metadata/Music/lyrics/lyricsmode.py | 4 ++- .../metadata/Music/lyrics/lyricswiki.py | 4 +-- .../scripts/metadata/Music/lyrics/ttplayer.py | 2 ++ 9 files changed, 50 insertions(+), 39 deletions(-) diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/README b/mythtv/programs/scripts/metadata/Music/lyrics/README index 2bf6c0296cd..b1b5d6d9dc4 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/README +++ b/mythtv/programs/scripts/metadata/Music/lyrics/README @@ -87,16 +87,18 @@ Options: -Current Grabbers And Their Priority -=================================== - -EmbeddedLyrics 100 -FileLyrics 105 -TTPlayer 110 -Baidu 120 -GomAudio 135 -Lyrdb 150 -LyricsWiki 200 -Genius 210 -LyricsMode 220 -DarkLyrics 230 +Current Grabbers, Their Priority And Whether Synchronized +========================================================= + +EmbeddedLyrics 100 Yes/No +FileLyrics 105 Yes/No +TTPlayer 110 Yes +Alsong 120 Yes +LetsSingIt 130 No +LyricsCom 140 No +LyricsWiki 150 No +Genius 160 No +LyricsMode 170 No +DarkLyrics 180 No +GomAudio 200 Yes +Baidu 210 Yes diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/baidu.py b/mythtv/programs/scripts/metadata/Music/lyrics/baidu.py index 7c2e818cc2c..1efe23ae3d6 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/baidu.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/baidu.py @@ -2,7 +2,7 @@ """ Scraper for http://www.baidu.com -taxigps +ronie """ import sys @@ -14,11 +14,11 @@ from optparse import OptionParser from common import utilities -__author__ = "Paul Harrison and 'taxigps'" +__author__ = "Paul Harrison and 'ronie'" __title__ = "Baidu" __description__ = "Search http://www.baidu.com for lyrics" __version__ = "0.1" -__priority__ = "120" +__priority__ = "210" __syncronized__ = True debug = False @@ -27,28 +27,27 @@ class LyricsFetcher: def __init__( self ): - self.BASE_URL = 'http://box.zhangmen.baidu.com/x?op=12&count=1&title=%s$$%s$$$$' - self.LRC_URL = 'http://box.zhangmen.baidu.com/bdlrc/%d/%d.lrc' + self.BASE_URL = 'http://music.baidu.com/search/lrc?key=%s-%s' + self.LRC_URL = 'http://music.baidu.com%s' def get_lyrics(self, lyrics): utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title)) try: - url = self.BASE_URL % (urllib.quote(lyrics.title), urllib.quote((lyrics.artist))) - xml_str = urllib.urlopen(url).read() - lrcid_pattern = re.compile(r'(.+?)') - lrcid = int(re.search(lrcid_pattern, xml_str).group(1)) - if lrcid == 0: + url = self.BASE_URL % (lyrics.title, lyrics.artist) + utilities.log(debug, "%s: searching url %s" % (__title__, url)) + data = urllib.urlopen(url).read() + songmatch = re.search('song-title.*?(.*?)', data, flags=re.DOTALL) + track = songmatch.group(1) + artistmatch = re.search('artist-title.*?(.*?)', data, flags=re.DOTALL) + name = artistmatch.group(1) + urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL) + found_url = urlmatch.group(1) + if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8): + lyr = urllib.urlopen(self.LRC_URL % found_url).read() + else: return False - lrc_url = self.LRC_URL % (lrcid/100, lrcid) - lyr = urllib.urlopen(lrc_url).read() except: - utilities.log(True, "%s: %s::%s (%d) [%s]" % ( - __title__, self.__class__.__name__, - sys.exc_info()[ 2 ].tb_frame.f_code.co_name, - sys.exc_info()[ 2 ].tb_lineno, - sys.exc_info()[ 1 ] - )) return False enc = chardet.detect(lyr) diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/darklyrics.py b/mythtv/programs/scripts/metadata/Music/lyrics/darklyrics.py index e05a46a59f1..8c3363d459c 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/darklyrics.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/darklyrics.py @@ -16,7 +16,7 @@ __author__ = "Paul Harrison and smory'" __title__ = "DarkLyrics" __description__ = "Search http://www.darklyrics.com/ - the largest metal lyrics archive on the Web" -__priority__ = "230"; +__priority__ = "180"; __version__ = "0.1" __syncronized__ = False; diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/embedlrc.py b/mythtv/programs/scripts/metadata/Music/lyrics/embedlrc.py index 7f3295549a7..f3d36484f68 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/embedlrc.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/embedlrc.py @@ -8,12 +8,12 @@ import sys, os, re, chardet import xml.dom.minidom as xml from optparse import OptionParser -from common import * +from common import utilities __author__ = "Paul Harrison and 'ronin'" __title__ = "EmbeddedLyrics" __description__ = "Search tracks tag for embedded lyrics" -__version__ = "0.1" +__version__ = "0.2" __priority__ = "100" __syncronized__ = True diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/genius.py b/mythtv/programs/scripts/metadata/Music/lyrics/genius.py index 453eed68f20..a07d1ff489d 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/genius.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/genius.py @@ -25,7 +25,7 @@ __author__ = "Paul Harrison and ronie'" __title__ = "Genius" __description__ = "Search http://www.genius.com for lyrics" -__priority__ = "210" +__priority__ = "160" __version__ = "0.1" __syncronized__ = False diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/gomaudio.py b/mythtv/programs/scripts/metadata/Music/lyrics/gomaudio.py index 079bbb324f6..0ae36bcee4a 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/gomaudio.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/gomaudio.py @@ -11,6 +11,7 @@ import hashlib import urllib import re +import unicodedata from optparse import OptionParser from common import utilities from common import audiofile @@ -28,6 +29,11 @@ GOM_URL = "http://newlyrics.gomtv.com/cgi-bin/lyrics.cgi?cmd=find_get_lyrics&file_key=%s&title=%s&artist=%s&from=gomaudio_local" +def remove_accents(data): + nfkd_data = unicodedata.normalize('NFKD', data) + return u"".join([c for c in nfkd_data if not unicodedata.combining(c)]) + + class gomClient(object): ''' privide Gom specific function, such as key from mp3 @@ -62,7 +68,7 @@ def get_lyrics(self, lyrics): key = gomClient.GetKeyFromFile(lyrics.filename) if not key: return False - url = GOM_URL %(key, urllib.quote(lyrics.title.decode("utf-8").encode("euc-kr")), urllib.quote(lyrics.artist.decode("utf-8").encode("euc-kr"))) + url = GOM_URL %(key, urllib.quote(remove_accents(lyrics.title.decode('utf-8')).encode('euc-kr')), (remove_accents(lyrics.artist.decode('utf-8')).encode('euc-kr'))) response = urllib.urlopen(url) Page = response.read() except: diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/lyricsmode.py b/mythtv/programs/scripts/metadata/Music/lyrics/lyricsmode.py index 9edce3e497f..b89c0854163 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/lyricsmode.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/lyricsmode.py @@ -8,7 +8,7 @@ __author__ = "Paul Harrison and ronie'" __title__ = "LyricsMode" __description__ = "Search http://www.lyricsmode.com for lyrics" -__priority__ = "220" +__priority__ = "170" __version__ = "0.1" __syncronized__ = False @@ -141,6 +141,8 @@ def buildVersion(): sys.exit(0) def main(): + global debug + parser = OptionParser() parser.add_option('-v', "--version", action="store_true", default=False, diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/lyricswiki.py b/mythtv/programs/scripts/metadata/Music/lyrics/lyricswiki.py index b7482a7d03f..bcda8959f7d 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/lyricswiki.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/lyricswiki.py @@ -12,8 +12,8 @@ __author__ = "Paul Harrison and ronie'" __title__ = "LyricsWiki" __description__ = "Search http://lyrics.wikia.com for lyrics" -__priority__ = "200" -__version__ = "0.1" +__priority__ = "150" +__version__ = "0.2" __syncronized__ = False diff --git a/mythtv/programs/scripts/metadata/Music/lyrics/ttplayer.py b/mythtv/programs/scripts/metadata/Music/lyrics/ttplayer.py index 6d373db9556..a573bce1c9c 100644 --- a/mythtv/programs/scripts/metadata/Music/lyrics/ttplayer.py +++ b/mythtv/programs/scripts/metadata/Music/lyrics/ttplayer.py @@ -247,6 +247,8 @@ def buildVersion(): sys.exit(0) def main(): + global debug + parser = OptionParser() parser.add_option('-v', "--version", action="store_true", default=False,