diff --git a/mythplugins/configure b/mythplugins/configure index b93e4af2fd5..0a0c145d942 100755 --- a/mythplugins/configure +++ b/mythplugins/configure @@ -460,6 +460,7 @@ if ! disabled weather; then check_pl_lib "DateTime::Format::ISO8601" || disable_weather "DateTime::Format::ISO8601" check_pl_lib "SOAP::Lite" || disable_weather "SOAP::Lite" + check_pl_lib "JSON" || disable_weather "JSON" fi if test "$dcraw" != "no" ; then diff --git a/mythplugins/mythweather/mythweather/scripts/uk_bbc/BBCLocation.pm b/mythplugins/mythweather/mythweather/scripts/uk_bbc/BBCLocation.pm index 95507d5fee6..de6693cb81e 100644 --- a/mythplugins/mythweather/mythweather/scripts/uk_bbc/BBCLocation.pm +++ b/mythplugins/mythweather/mythweather/scripts/uk_bbc/BBCLocation.pm @@ -1,120 +1,212 @@ +#! /usr/bin/perl +# vim:ts=4:sw=4:ai:et:si:sts=4 + package BBCLocation; use strict; +use warnings; require Exporter; -use LWP::Simple; -our @EXPORT = qw(Search); -our $VERSION = 0.2; +use utf8; +use encoding 'utf8'; +use LWP::UserAgent; +use JSON; +use XML::XPath; +use XML::XPath::XMLParser; +use URI::Escape; + + +our @EXPORT = qw(Search FindLoc); +our $VERSION = 0.3; my @searchresults; my @resulturl; my $resultcount = -1; sub Search { + my ($search_string, $dir, $timeout, $logdir) = @_; + $search_string = uri_escape($search_string); - my $base_url = 'http://news.bbc.co.uk/weather/util/search/Search.xhtml?'; - my $world_base_url = $base_url . 'lowgraphics=true®ion=world&search='; - my $local_base_url = $base_url . 'lowgraphics=true®ion=uk&search='; + my $base_url = 'http://www.bbc.co.uk/locator/client/weather/en-GB/' . + 'search.json'; + my $search_url = $base_url . '?ptrt=/&search='; - my $search_string = shift; - my $world_response = get $world_base_url . $search_string; - my $local_response = get $local_base_url . $search_string; + my $file = $search_string; + getCachedJSON($search_url . $search_string, $dir, $file, $timeout, $logdir); - &parseResults($world_response) if defined($world_response); - &parseResults($local_response) if defined($local_response); + my $cachefile = "$dir/$file.json"; + my $cachefile1 = "$dir/$file-results.html"; + my $cachefile2 = "$dir/$file-pagination.html"; - if ( ($resultcount > 0 ) && ($#searchresults < 0) ) { - foreach my $url (@resulturl) { - my $url_response = get $base_url . $url; - die unless defined $url_response; - &parseResults($url_response); - } - } + open OF, "<:utf8", $cachefile or die "Can't read $cachefile: $!\n"; + my $content = do { local $/; }; + close OF; - return @searchresults; -} + my $decoded = decode_json $content; + $resultcount = $decoded->{"noOfResults"}; -sub parseResults { - my $response = shift; - my $isresults = 0; - my $resultline = ""; - - # Initialise a hash for the $locid & $locname results. - - # Use of a hash indexed by $locid ensures that more informative results - # (e.g. "Sale, Australia" vs. "Sale") coming from

- # section will overwrite less informative results coming from - # section - my %loc_hash = (); - foreach (split("\n", $response)) { - - # Declare a result if either the '

OR strings are found - # This ensures that single and multiple matches are caught - - if (/

/ || //) { - $isresults = 1; + get_results($cachefile1, \%loc_hash); + + if (exists $decoded->{"pagination"}) { + my %pages = (); + my $xp = XML::XPath->new(filename => $cachefile2); + my $nodeset = $xp->find("//ol/li/a"); + foreach my $node ($nodeset->get_nodelist) { + my $url = $node->getAttribute("href"); + my $num = $node->string_value; + $url =~ s/&/&/; + $num =~ s/ //g; + $pages{$num} = $url; } - my $locname; - my $locid; - my $url; - - if ($isresults) { - last if (/There are no forecasts matching/); - - $resultcount = $1 if (/There \w{2,3} (\d*) forecasts? matching/); - - # Collect result URLs - if (/(.*)<\/a>.*/$1/s; - - $resultline = $locid . "::" . $locname; - - $loc_hash{$locid} = $locname; - } - - # Extract location ID and name from "Print " link - - # This string is always present (provided valid search result - invalid results are caught above) - # irresepective of whether single or multiple matches are returned - - elsif (/= ($now - $timeout)) ) { + # File cache is still recent. + log_print( $logdir, "cached in $cachefile\n" ); + } else { + log_print( $logdir, "$url\ncaching to $cachefile\n" ); + + my $ua = LWP::UserAgent->new; + $ua->timeout(30); + $ua->env_proxy; + $ua->default_header('Accept-Language' => "en"); + + my $response = $ua->get($url); + if ( !$response->is_success ) { + die $response->status_line; + } + + open OF, ">:utf8", $cachefile or die "Can't open $cachefile: $!\n"; + print OF $response->content; + close OF; + + $decoded = decode_json $response->content; + + open OF, ">:utf8", $cachefile1 or die "Can't open $cachefile1: $!\n"; + print OF "".$decoded->{"results"}.""; + close OF; + + if (exists $decoded->{"pagination"}) { + open OF, ">:utf8", $cachefile2 or + die "Can't open $cachefile2: $!\n"; + print OF "".$decoded->{"pagination"}.""; + close OF; + } else { + unlink $cachefile2; + } + } +} + +sub get_results { + my ($file, $outhash) = @_; + + my $xp = XML::XPath->new(filename => $file); + my $nodeset = $xp->find("//ul/li/a"); + foreach my $node ($nodeset->get_nodelist) { + my $url = $node->getAttribute("href"); + my $loc = $node->string_value; + + $url =~ s/^\/weather\///; + $outhash->{$url} = $loc; + + print "$url"."::$loc\n"; + } +} + +sub log_print { + return if not defined $::opt_D; + my $dir = shift; + + open OF, ">>$dir/uk_bbc.log"; + print OF @_; + close OF; +} + +sub FindLoc { + my ($locid, $dir, $timeout, $logdir) = @_; + + my $url = "http://www.bbc.co.uk/weather/$locid"; + + my $file = "$locid.html"; + getCachedHTML($url, $dir, $file, $timeout, $logdir); + + my $cachefile = "$dir/$file"; + + open OF, "<:utf8", $cachefile; + my $contents = do { local $/; ; }; + close OF; + + my ($rssid) = ($contents =~ /data-loc="(.*?)"/); + die "No RSS Location found for ID $locid!\n" unless defined $rssid; + + $rssid =~ s/^LOC-//; + return $rssid; +} + + +sub getCachedHTML { + my ($url, $dir, $file, $timeout, $logdir) = @_; + + my $cachefile = "$dir/$file"; + + my $now = time(); + + log_print( $logdir, "Loading URL: $url\n" ); + + if( (-e $cachefile) and ((stat($cachefile))[9] >= ($now - $timeout)) ) { + # File cache is still recent. + log_print( $logdir, "cached in $cachefile\n" ); + } else { + log_print( $logdir, "$url\ncaching to $cachefile\n" ); + + my $ua = LWP::UserAgent->new; + $ua->timeout(30); + $ua->env_proxy; + $ua->default_header('Accept-Language' => "en"); + + my $response = $ua->get($url); + if ( !$response->is_success ) { + die $response->status_line; + } + + my $content = $response->content; + open OF, ">:utf8", $cachefile or die "Can't open $cachefile: $!\n"; + print OF $content; + close OF; + } +} + + 1; diff --git a/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbccurrentxml.pl b/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbccurrentxml.pl index 45fd8c5839c..348a3945ee8 100755 --- a/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbccurrentxml.pl +++ b/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbccurrentxml.pl @@ -1,13 +1,13 @@ #! /usr/bin/perl - -# -# Based on nwsxml.pl by Lucien Dunning -# +# vim:ts=4:sw=4:ai:et:si:sts=4 use strict; use warnings; +use utf8; +use encoding 'utf8'; use English; + use File::Basename; use Cwd 'abs_path'; use lib dirname(abs_path($0 or $PROGRAM_NAME)), @@ -16,47 +16,61 @@ use XML::Simple; use LWP::Simple; -# Ideally we would use the If-Modified-Since header -# to reduce server load, but they ignore it -#use HTTP::Cache::Transparent; use Getopt::Std; +use File::Path; + use File::Basename; use lib dirname($0); use BBCLocation; -our ($opt_v, $opt_t, $opt_T, $opt_l, $opt_u, $opt_d); +our ($opt_v, $opt_t, $opt_T, $opt_l, $opt_u, $opt_d, $opt_D); my $name = 'BBC-Current-XML'; -my $version = 0.2; -my $author = 'Stuart Morgan'; -my $email = 'stuart@tase.co.uk'; +my $version = 0.3; +my $author = 'Gavin Hurlbut / Stuart Morgan'; +my $email = 'gjhurlbu@gmail.com / stuart@tase.co.uk'; my $updateTimeout = 120*60; # 2 Hours, BBC updates infrequently ~3 hours -# Given that the option to update in the background now exists -# potentially we could be hitting the server 12 times in a day my $retrieveTimeout = 30; my @types = ('cclocation', 'station_id', 'copyright', 'observation_time', 'weather', 'temp', 'relative_humidity', 'wind_dir', 'pressure', 'visibility', 'weather_icon', 'appt', 'wind_spdgst'); -my $dir = "./"; +my $dir = "/tmp/uk_bbc"; +my $logdir = "/tmp/uk_bbc"; + +binmode(STDOUT, ":utf8"); + +if (!-d $logdir) { + mkpath( $logdir, {mode => 0755} ); +} getopts('Tvtlu:d:'); if (defined $opt_v) { print "$name,$version,$author,$email\n"; + log_print( $logdir, "-v\n" ); exit 0; } if (defined $opt_T) { print "$updateTimeout,$retrieveTimeout\n"; + log_print( $logdir, "-t\n" ); exit 0; } -if (defined $opt_l) { +if (defined $opt_d) { + $dir = $opt_d; +} +if (!-d $dir) { + mkpath( $dir, {mode => 0755} ); +} + +if (defined $opt_l) { my $search = shift; - my @results = BBCLocation::Search($search); + log_print( $logdir, "-l $search\n" ); + my @results = BBCLocation::Search($search, $dir, $updateTimeout, $logdir); my $result; foreach (@results) { @@ -71,13 +85,9 @@ exit 0; } -if (defined $opt_d) { - $dir = $opt_d; -} - # we get here, we're doing an actual retrieval, everything must be defined -my $locid = shift; +my $locid = BBCLocation::FindLoc(shift, $dir, $updateTimeout, $logdir); if (!(defined $opt_u && defined $locid && !$locid eq "")) { die "Invalid usage"; } @@ -248,3 +258,12 @@ printf $datalabel . "::" . $datavalue . "\n"; } + +sub log_print { + return if not defined $::opt_D; + my $dir = shift; + + open OF, ">>$dir/uk_bbc.log"; + print OF @_; + close OF; +} diff --git a/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbcthreedayxml.pl b/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbcthreedayxml.pl index 1998c003e0d..a966cb35b84 100755 --- a/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbcthreedayxml.pl +++ b/mythplugins/mythweather/mythweather/scripts/uk_bbc/bbcthreedayxml.pl @@ -1,13 +1,13 @@ #! /usr/bin/perl - -# -# Based on nwsxml.pl by Lucien Dunning -# +# vim:ts=4:sw=4:ai:et:si:sts=4 use strict; use warnings; +use utf8; +use encoding 'utf8'; use English; + use File::Basename; use Cwd 'abs_path'; use lib dirname(abs_path($0 or $PROGRAM_NAME)), @@ -16,44 +16,60 @@ use XML::Simple; use LWP::Simple; -# Ideally we would use the If-Modified-Since header -# to reduce server load, but they ignore it -#use HTTP::Cache::Transparent; use Getopt::Std; +use File::Path; + use File::Basename; use lib dirname($0); use BBCLocation; -our ($opt_v, $opt_t, $opt_T, $opt_l, $opt_u, $opt_d); +our ($opt_v, $opt_t, $opt_T, $opt_l, $opt_u, $opt_d, $opt_D); my $name = 'BBC-3day-XML'; -my $version = 0.2; -my $author = 'Stuart Morgan'; -my $email = 'stuart@tase.co.uk'; +my $version = 0.3; +my $author = 'Gavin Hurlbut / Stuart Morgan'; +my $email = 'gjhurlbu@gmail.com / stuart@tase.co.uk'; my $updateTimeout = 360*60; # 6 Hours my $retrieveTimeout = 30; my @types = ('3dlocation', 'station_id', 'copyright', 'weather_icon', 'date-0', 'icon-0', 'low-0', 'high-0', 'date-1', 'icon-1', 'low-1', 'high-1', 'date-2', 'icon-2', 'low-2', 'high-2', 'updatetime'); -my $dir = "./"; +my $dir = "/tmp/uk_bbc"; +my $logdir = "/tmp/uk_bbc"; + +binmode(STDOUT, ":utf8"); + +if (!-d $logdir) { + mkpath( $logdir, {mode => 0755} ); +} getopts('Tvtlu:d:'); if (defined $opt_v) { print "$name,$version,$author,$email\n"; + log_print( $logdir, "-v\n" ); exit 0; } if (defined $opt_T) { print "$updateTimeout,$retrieveTimeout\n"; + log_print( $logdir, "-t\n" ); exit 0; } -if (defined $opt_l) { +if (defined $opt_d) { + $dir = $opt_d; +} +if (!-d $dir) { + mkpath( $dir, {mode => 0755} ); +} + +if (defined $opt_l) { my $search = shift; - my @results = BBCLocation::Search($search); + log_print( $logdir, "-l $search\n" ); + my @results = BBCLocation::Search($search, $dir, $updateTimeout, $logdir); my $result; foreach (@results) { @@ -68,13 +84,9 @@ exit 0; } -if (defined $opt_d) { - $dir = $opt_d; -} - # we get here, we're doing an actual retrieval, everything must be defined -my $locid = shift; +my $locid = BBCLocation::FindLoc(shift, $dir, $updateTimeout, $logdir); if (!(defined $opt_u && defined $locid && !$locid eq "")) { die "Invalid usage"; } @@ -232,3 +244,12 @@ $i++; } + +sub log_print { + return if not defined $::opt_D; + my $dir = shift; + + open OF, ">>$dir/uk_bbc.log"; + print OF @_; + close OF; +}