Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
MythGame: Add a MAWS (MAME) Metadata grabber.
There are a few issues with the forthcoming grabber chooser with this script, but it's close enough that it should be committed. Script by Auric.
- Loading branch information
Robert McNamara
committed
Jan 23, 2011
1 parent
6f63367
commit f21ff18
Showing
1 changed file
with
255 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
#!/usr/bin/env perl | ||
# @(#)$Header: /home/mythtv/mythtvrep/scripts/maws.pl,v 1.3 2010/06/04 07:48:49 mythtv Exp $ | ||
# Auric 2010/01/10 http://web.aanet.com.au/auric/ | ||
# | ||
# MAWS metadata Grabber Script | ||
# | ||
################################################################################ | ||
use strict; | ||
use warnings; | ||
use Getopt::Std; | ||
use LWP::Simple; | ||
use HTML::TreeBuilder; | ||
use HTML::Entities; | ||
use Data::Dumper; | ||
use Encode; | ||
|
||
#################################### Settings ################################# | ||
my $info = 0; # print info/progress message: 0 - off, 1 - low ,2 - high | ||
my $infoop = 0; # info messages go to: 0 = stderr, filename = filename | ||
|
||
#################################### Globals ################################## | ||
my $site = 'MAWS'; | ||
my $baseurl = 'http://maws.mameworld.info'; | ||
my $searchurl = $baseurl . "/maws/srch.php?search_text="; | ||
my $header = '<?xml version="1.0" encoding="UTF-8"?> | ||
<metadata>'; | ||
my $footer = '</metadata>'; | ||
our ($opt_M, $opt_D); | ||
my @metaitems; | ||
#################################### Util Subs ############################################ | ||
# If you copy this for another site, hopefully these won't need to changed | ||
|
||
sub cleanexit { | ||
my $esig = shift @_; | ||
|
||
fileno(FH) and close(FH); | ||
if ($esig =~ /\D/) { | ||
# called by signalhandler | ||
exit 1; | ||
} else { | ||
exit $esig; | ||
} | ||
} | ||
|
||
sub infomsg { | ||
my $level = shift @_; | ||
my $mesg = shift @_; | ||
|
||
($info < $level) and return; | ||
$mesg = encode('utf8', $mesg); | ||
if ($infoop =~ /\D/) { | ||
open(FH, ">$infoop") unless fileno(FH); | ||
my $t = localtime(); | ||
print FH "$t $level $mesg\n"; | ||
} else { | ||
print STDERR "$mesg\n"; | ||
} | ||
} | ||
|
||
sub cleantext { | ||
my $text = shift @_; | ||
|
||
($text) or return; | ||
$text =~ s/\n/ /g; | ||
$text =~ s/^\s+|\s+$//g; | ||
$text = encode_entities($text); | ||
return $text; | ||
} | ||
|
||
sub printitems { | ||
|
||
foreach my $i (@metaitems) { | ||
my %item = %{$i}; | ||
print " ".'<item>'."\n"; | ||
print " ".'<title>'.$item{'title'}.'</title>'."\n"; | ||
print " ".'<inetref>'.$item{'inetref'}.'</inetref>'."\n"; | ||
print " ".'<description>'.$item{'description'}.'</description>'."\n"; | ||
print " ".'<system>MAME</system>'."\n"; | ||
print " ".'<categories>'."\n"; | ||
print " ".'<category type="genre" name="'.$item{'genre'}.'"/>'."\n"; | ||
print " ".'</categories>'."\n"; | ||
print " ".'<studios>'."\n"; | ||
print " ".'<studio name="'.$item{'studio'}.'"/>'."\n"; | ||
print " ".'</studios>'."\n"; | ||
print " ".'<year>'.$item{'year'}.'</year>'."\n"; | ||
print " ".'<images>'."\n"; | ||
print " ".'<image type="screenshot" thumb="'.$item{'screenshotthumb'}.'" url="'.$item{'screenshoturl'}.'"/>'."\n"; | ||
print " ".'<image type="coverart" thumb="'.$item{'coverthumb'}.'" url="'.$item{'coverurl'}.'"/>'."\n"; | ||
print " ".'</images>'."\n"; | ||
print " ".'<popularity>'.$item{'popularity'}.'</popularity>'."\n"; | ||
print " ".'</item>'."\n"; | ||
} | ||
} | ||
|
||
#################################### Site Specific Subs ########################## | ||
sub search { | ||
my $searchstr = shift @_; | ||
|
||
my $content = get(${searchurl} . ${searchstr}); | ||
unless ($content) { | ||
die "Could not retrieve ${searchurl}${searchstr}"; | ||
} | ||
my $tree = HTML::TreeBuilder->new; | ||
eval { $tree->parse($content); }; | ||
if ($@) { | ||
die "$searchurl parse failed, $@"; | ||
} | ||
$tree->eof(); | ||
|
||
my @trs = $tree->find_by_tag_name('tr'); | ||
foreach my $tr (@trs) { | ||
my @as = $tr->find_by_tag_name('a'); | ||
my $count = 0; | ||
foreach my $a (@as) { | ||
($count++ < 2) && next; | ||
($a->as_trimmed_text()) || last; | ||
$a->attr('href') =~ /romset/ || last; | ||
my $title = cleantext($a->as_trimmed_text()); | ||
my $inetref = $a->attr('href'); | ||
$inetref =~ s/\#.*$//; | ||
$inetref =~ s/^\/maws\///; | ||
$inetref = cleantext($baseurl . "/maws/" . $inetref); | ||
push(@metaitems, { | ||
'title' => $title, | ||
'inetref' => $inetref, | ||
'description' => "", | ||
'genre' => "", | ||
'studio' => "", | ||
'year' => "", | ||
'coverthumb' => "", | ||
'coverurl' => "", | ||
'screenshotthumb' => "", | ||
'screenshoturl' => "", | ||
'popularity' => "" | ||
}); | ||
last; | ||
} | ||
} | ||
return 0; | ||
} | ||
|
||
sub queryinetref { | ||
my $inetref = shift @_; | ||
|
||
my $content = get(${inetref}); | ||
unless ($content) { | ||
die "Could not retrieve ${inetref}"; | ||
} | ||
my $tree = HTML::TreeBuilder->new; | ||
eval { $tree->parse($content); }; | ||
if ($@) { | ||
die "$inetref parse failed, $@"; | ||
} | ||
$tree->eof(); | ||
|
||
my $title = ""; | ||
my $description = ""; | ||
my $genre = ""; | ||
my $studio = ""; | ||
my $year = ""; | ||
my $coverthumb = ""; | ||
my $coverurl = ""; | ||
my $screenshoturl = ""; | ||
my $screenshotthumb = ""; | ||
my $popularity = ""; | ||
my @trs = $tree->find_by_tag_name('tr'); | ||
foreach my $tr (@trs) { | ||
my @tds = $tr->find_by_tag_name('td'); | ||
foreach my $td (@tds) { | ||
if ($td->as_trimmed_text() eq "title" ) { | ||
my $right = $td->right(); | ||
($right) and $title = cleantext($right->as_trimmed_text()); | ||
} elsif ($td->as_trimmed_text() eq "manufacturer" ) { | ||
my $right = $td->right(); | ||
($right) and $studio = cleantext($right->as_trimmed_text()); | ||
} elsif ($td->as_trimmed_text() eq "year" ) { | ||
my $right = $td->right(); | ||
($right) and $year = cleantext($right->as_trimmed_text()); | ||
} elsif ($td->as_trimmed_text() eq "genre" ) { | ||
my $right = $td->right(); | ||
($right) and $genre = cleantext($right->as_trimmed_text()); | ||
} elsif ($td->as_trimmed_text() eq "snapshots" ) { | ||
my $right = $td->right(); | ||
my @as = $tree->find_by_tag_name('a'); | ||
foreach my $a (@as) { | ||
if ((!$screenshoturl) && $a->as_trimmed_text() =~ /in game/) { | ||
if ($a->attr('onClick')) { | ||
$screenshoturl = $a->attr('onClick'); | ||
$screenshoturl =~ s/.*\'(.*)\'.*/$1/; | ||
$screenshoturl =~ s/^\.\.//; | ||
$screenshoturl =~ s/^\///; | ||
$screenshoturl = cleantext($baseurl . "/" . $screenshoturl); | ||
# Making thumb same as they are small. | ||
#$screenshotthumb = $screenshoturl; | ||
} | ||
} | ||
if ($a->as_trimmed_text() =~ /flyer/) { | ||
$coverurl = cleantext($baseurl . $a->attr('href')); | ||
# Making thumb same as they are small. | ||
#$coverthumb = $coverurl; | ||
} | ||
} | ||
} elsif ($td->as_trimmed_text() eq "rating" ) { | ||
my $right = $td->right(); | ||
my $d = $right->find_by_tag_name('div'); | ||
if ($d->attr('title')) { | ||
$popularity = $d->attr('title'); | ||
$popularity =~ s/(.*)%.*/$1/; | ||
$popularity = int(($popularity / 10) + 0.5); | ||
} | ||
} | ||
} | ||
} | ||
push(@metaitems, { | ||
'title' => $title, | ||
'inetref' => $inetref, | ||
'description' => $description, | ||
'genre' => $genre, | ||
'studio' => $studio, | ||
'year' => $year, | ||
'coverthumb' => $coverthumb, | ||
'coverurl' => $coverurl, | ||
'screenshotthumb' => $screenshotthumb, | ||
'screenshoturl' => $screenshoturl, | ||
'popularity' => $popularity | ||
}); | ||
return 0; | ||
} | ||
|
||
#################################### Main ##################################### | ||
getopts('M:D:'); | ||
|
||
unless (($opt_M) || ($opt_D)){ | ||
print "Error must have either -M search str or -D inetref\n"; | ||
cleanexit 1; | ||
} | ||
|
||
$SIG{'INT'} = \&cleanexit; | ||
$SIG{'HUP'} = \&cleanexit; | ||
$SIG{'TERM'} = \&cleanexit; | ||
$SIG{'QUIT'} = \&cleanexit; | ||
|
||
print "$header\n"; | ||
|
||
if ($opt_M) { | ||
search($opt_M); | ||
printitems(); | ||
} elsif ($opt_D) { | ||
queryinetref($opt_D); | ||
printitems(); | ||
} | ||
|
||
print "$footer\n"; | ||
|
||
cleanexit 0; |