Skip to content

Commit

Permalink
mythmusic: various tweaks to the lyrics grabbers
Browse files Browse the repository at this point in the history
* tweak the priorities of the grabbers
* fix the debugging on a couple of the grabbers
* attempt to fix the Baidu grabber
* Update the README
  • Loading branch information
Paul Harrison committed Jul 20, 2017
1 parent 9156dfb commit 853c9bd
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 39 deletions.
28 changes: 15 additions & 13 deletions mythtv/programs/scripts/metadata/Music/lyrics/README
Expand Up @@ -87,16 +87,18 @@ Options:



Current Grabbers And Their Priority
===================================

EmbeddedLyrics 100
FileLyrics 105
TTPlayer 110
Baidu 120
GomAudio 135
Lyrdb 150
LyricsWiki 200
Genius 210
LyricsMode 220
DarkLyrics 230
Current Grabbers, Their Priority And Whether Synchronized
=========================================================

EmbeddedLyrics 100 Yes/No
FileLyrics 105 Yes/No
TTPlayer 110 Yes
Alsong 120 Yes
LetsSingIt 130 No
LyricsCom 140 No
LyricsWiki 150 No
Genius 160 No
LyricsMode 170 No
DarkLyrics 180 No
GomAudio 200 Yes
Baidu 210 Yes
35 changes: 17 additions & 18 deletions mythtv/programs/scripts/metadata/Music/lyrics/baidu.py
Expand Up @@ -2,7 +2,7 @@
"""
Scraper for http://www.baidu.com
taxigps
ronie
"""

import sys
Expand All @@ -14,11 +14,11 @@
from optparse import OptionParser
from common import utilities

__author__ = "Paul Harrison and 'taxigps'"
__author__ = "Paul Harrison and 'ronie'"
__title__ = "Baidu"
__description__ = "Search http://www.baidu.com for lyrics"
__version__ = "0.1"
__priority__ = "120"
__priority__ = "210"
__syncronized__ = True

debug = False
Expand All @@ -27,28 +27,27 @@

class LyricsFetcher:
def __init__( self ):
self.BASE_URL = 'http://box.zhangmen.baidu.com/x?op=12&count=1&title=%s$$%s$$$$'
self.LRC_URL = 'http://box.zhangmen.baidu.com/bdlrc/%d/%d.lrc'
self.BASE_URL = 'http://music.baidu.com/search/lrc?key=%s-%s'
self.LRC_URL = 'http://music.baidu.com%s'

def get_lyrics(self, lyrics):
utilities.log(debug, "%s: searching lyrics for %s - %s - %s" % (__title__, lyrics.artist, lyrics.album, lyrics.title))

try:
url = self.BASE_URL % (urllib.quote(lyrics.title), urllib.quote((lyrics.artist)))
xml_str = urllib.urlopen(url).read()
lrcid_pattern = re.compile(r'<lrcid>(.+?)</lrcid>')
lrcid = int(re.search(lrcid_pattern, xml_str).group(1))
if lrcid == 0:
url = self.BASE_URL % (lyrics.title, lyrics.artist)
utilities.log(debug, "%s: searching url %s" % (__title__, url))
data = urllib.urlopen(url).read()
songmatch = re.search('song-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
track = songmatch.group(1)
artistmatch = re.search('artist-title.*?<em>(.*?)</em>', data, flags=re.DOTALL)
name = artistmatch.group(1)
urlmatch = re.search("down-lrc-btn.*?':'(.*?)'", data, flags=re.DOTALL)
found_url = urlmatch.group(1)
if (difflib.SequenceMatcher(None, lyrics.artist.lower(), name.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, lyrics.title.lower(), track.lower()).ratio() > 0.8):
lyr = urllib.urlopen(self.LRC_URL % found_url).read()
else:
return False
lrc_url = self.LRC_URL % (lrcid/100, lrcid)
lyr = urllib.urlopen(lrc_url).read()
except:
utilities.log(True, "%s: %s::%s (%d) [%s]" % (
__title__, self.__class__.__name__,
sys.exc_info()[ 2 ].tb_frame.f_code.co_name,
sys.exc_info()[ 2 ].tb_lineno,
sys.exc_info()[ 1 ]
))
return False

enc = chardet.detect(lyr)
Expand Down
Expand Up @@ -16,7 +16,7 @@
__author__ = "Paul Harrison and smory'"
__title__ = "DarkLyrics"
__description__ = "Search http://www.darklyrics.com/ - the largest metal lyrics archive on the Web"
__priority__ = "230";
__priority__ = "180";
__version__ = "0.1"
__syncronized__ = False;

Expand Down
4 changes: 2 additions & 2 deletions mythtv/programs/scripts/metadata/Music/lyrics/embedlrc.py
Expand Up @@ -8,12 +8,12 @@
import sys, os, re, chardet
import xml.dom.minidom as xml
from optparse import OptionParser
from common import *
from common import utilities

__author__ = "Paul Harrison and 'ronin'"
__title__ = "EmbeddedLyrics"
__description__ = "Search tracks tag for embedded lyrics"
__version__ = "0.1"
__version__ = "0.2"
__priority__ = "100"
__syncronized__ = True

Expand Down
2 changes: 1 addition & 1 deletion mythtv/programs/scripts/metadata/Music/lyrics/genius.py
Expand Up @@ -25,7 +25,7 @@
__author__ = "Paul Harrison and ronie'"
__title__ = "Genius"
__description__ = "Search http://www.genius.com for lyrics"
__priority__ = "210"
__priority__ = "160"
__version__ = "0.1"
__syncronized__ = False

Expand Down
8 changes: 7 additions & 1 deletion mythtv/programs/scripts/metadata/Music/lyrics/gomaudio.py
Expand Up @@ -11,6 +11,7 @@
import hashlib
import urllib
import re
import unicodedata
from optparse import OptionParser
from common import utilities
from common import audiofile
Expand All @@ -28,6 +29,11 @@

GOM_URL = "http://newlyrics.gomtv.com/cgi-bin/lyrics.cgi?cmd=find_get_lyrics&file_key=%s&title=%s&artist=%s&from=gomaudio_local"

def remove_accents(data):
nfkd_data = unicodedata.normalize('NFKD', data)
return u"".join([c for c in nfkd_data if not unicodedata.combining(c)])


class gomClient(object):
'''
privide Gom specific function, such as key from mp3
Expand Down Expand Up @@ -62,7 +68,7 @@ def get_lyrics(self, lyrics):
key = gomClient.GetKeyFromFile(lyrics.filename)
if not key:
return False
url = GOM_URL %(key, urllib.quote(lyrics.title.decode("utf-8").encode("euc-kr")), urllib.quote(lyrics.artist.decode("utf-8").encode("euc-kr")))
url = GOM_URL %(key, urllib.quote(remove_accents(lyrics.title.decode('utf-8')).encode('euc-kr')), (remove_accents(lyrics.artist.decode('utf-8')).encode('euc-kr')))
response = urllib.urlopen(url)
Page = response.read()
except:
Expand Down
4 changes: 3 additions & 1 deletion mythtv/programs/scripts/metadata/Music/lyrics/lyricsmode.py
Expand Up @@ -8,7 +8,7 @@
__author__ = "Paul Harrison and ronie'"
__title__ = "LyricsMode"
__description__ = "Search http://www.lyricsmode.com for lyrics"
__priority__ = "220"
__priority__ = "170"
__version__ = "0.1"
__syncronized__ = False

Expand Down Expand Up @@ -141,6 +141,8 @@ def buildVersion():
sys.exit(0)

def main():
global debug

parser = OptionParser()

parser.add_option('-v', "--version", action="store_true", default=False,
Expand Down
4 changes: 2 additions & 2 deletions mythtv/programs/scripts/metadata/Music/lyrics/lyricswiki.py
Expand Up @@ -12,8 +12,8 @@
__author__ = "Paul Harrison and ronie'"
__title__ = "LyricsWiki"
__description__ = "Search http://lyrics.wikia.com for lyrics"
__priority__ = "200"
__version__ = "0.1"
__priority__ = "150"
__version__ = "0.2"
__syncronized__ = False


Expand Down
2 changes: 2 additions & 0 deletions mythtv/programs/scripts/metadata/Music/lyrics/ttplayer.py
Expand Up @@ -247,6 +247,8 @@ def buildVersion():
sys.exit(0)

def main():
global debug

parser = OptionParser()

parser.add_option('-v', "--version", action="store_true", default=False,
Expand Down

0 comments on commit 853c9bd

Please sign in to comment.