Skip to content

Commit

Permalink
Adress GH #55 - ->decoded_content() does not work for non-HTML content
Browse files Browse the repository at this point in the history
  • Loading branch information
Max Maischein committed Feb 22, 2021
1 parent a27af42 commit 76329ee
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 13 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -64,6 +64,7 @@ t/50-form2.html
t/50-form2.t
t/50-form3.html
t/50-mech-autoclose.t
t/50-mech-content-nonhtml.t
t/50-mech-content.t
t/50-mech-ct.t
t/50-mech-encoding.t
Expand Down
43 changes: 30 additions & 13 deletions lib/WWW/Mechanize/Chrome.pm
Expand Up @@ -3114,19 +3114,36 @@ sub document( $self ) {
}

sub decoded_content($self) {
$self->document_future->then(sub( $root ) {
# Join _all_ child nodes together to also fetch DOCTYPE nodes
# and the stuff that comes after them
my @content = map {
my $nodeId = $_->{nodeId};
$self->log('trace', "Fetching HTML for node " . $nodeId );
$self->target->send_message('DOM.getOuterHTML', nodeId => 0+$nodeId )
} @{ $root->{root}->{children} };

Future->wait_all( @content )
})->then( sub( @outerHTML_f ) {
Future->done( join "", map { $_->get->{outerHTML} } @outerHTML_f )
})->get;
my $res;
my $ct = $self->ct || 'text/html';
if( $ct eq 'text/html' ) {
$res = $self->document_future->then(sub( $root ) {
# Join _all_ child nodes together to also fetch DOCTYPE nodes
# and the stuff that comes after them

my @content = map {
my $nodeId = $_->{nodeId};
$self->log('trace', "Fetching HTML for node " . $nodeId );
$self->target->send_message('DOM.getOuterHTML', nodeId => 0+$nodeId )
} @{ $root->{root}->{children} };

return Future->wait_all( @content )
->then( sub( @outerHTML_f ) {
Future->done( join "", map { $_->get->{outerHTML} } @outerHTML_f );
});
});
} else {
# Return the raw body
#use Data::Dumper;
#warn Dumper $self->response;
#warn $self->response->content;

# The content is already decoded (?!)
# I'm not sure how well this plays with encodings, and
# binary content
$res = Future->done($self->response->content);
};
return $res->get
};

=head2 C<< $mech->content( %options ) >>
Expand Down
41 changes: 41 additions & 0 deletions t/50-mech-content-nonhtml.t
@@ -0,0 +1,41 @@
#!perl -w
use strict;
use Test::More;
use WWW::Mechanize::Chrome;
use Log::Log4perl qw(:easy);
use lib '.';

use t::helper;

Log::Log4perl->easy_init($ERROR); # Set priority of root logger to ERROR

# What instances of Chrome will we try?
my @instances = t::helper::browser_instances();
my $testcount = 2;

if (my $err = t::helper::default_unavailable) {
plan skip_all => "Couldn't connect to Chrome: $@";
exit
} else {
plan tests => $testcount*@instances;
};

sub new_mech {
t::helper::need_minimum_chrome_version( '62.0.0.0', @_ );
WWW::Mechanize::Chrome->new(
autodie => 1,
@_,
);
};

t::helper::run_across_instances(\@instances, \&new_mech, $testcount, sub {
my ($browser_instance, $mech) = @_;

isa_ok $mech, 'WWW::Mechanize::Chrome';

$mech->get('https://corion.net/style.css');

my $body = $mech->decoded_content();

like $body, qr!^/\*!, "We retrieve the raw CSS";
});

0 comments on commit 76329ee

Please sign in to comment.