Skip to content

Commit

Permalink
[#944] [#946] Extracted check for HTML signature from `MT::Image::che…
Browse files Browse the repository at this point in the history
…ck_upload` into its own class method.
  • Loading branch information
jayallen committed Jul 6, 2011
1 parent 525de85 commit 0012fb8
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 40 deletions.
159 changes: 123 additions & 36 deletions lib/MT/Image.pm
Expand Up @@ -79,6 +79,7 @@ sub check_upload {
my $fh = $params{Fh};
my $ext = $params{ext};
my $local_base = $params{LocalBase};
my $local = $params{Local};

###
#
Expand All @@ -93,51 +94,31 @@ sub check_upload {

if ( $ext =~ m/(jpe?g|png|gif|bmp|tiff?|ico)/i ) {

my $data = '';
my %sig_args = $fh ? ( fh => $fh )
: $local ? ( path => $local )
: ();
die 'No filehandle or path provided'
unless keys %sig_args;

## Read first 1k of image file
binmode($fh);
seek( $fh, 0, 0 );
read $fh, $data, 1024;
seek( $fh, 0, 0 );
my $has_html = $class->has_html_signature( %sig_args );
$has_html and return $class->errtrans( "Invalid upload file" );

## Using an error message format that already exists in all localizations of Movable Type 4.
if (
( $data =~ m/^\s*<[!?]/ )
|| ( $data
=~ m/<(HTML|SCRIPT|TITLE|BODY|HEAD|PLAINTEXT|TABLE|IMG|PRE|A)/i
)
|| ( $data =~ m/text\/html/i )
|| ( $data
=~ m/^\s*<(FRAMESET|IFRAME|LINK|BASE|STYLE|DIV|P|FONT|APPLET)/i
)
|| ( $data
=~ m/^\s*<(APPLET|META|CENTER|FORM|ISINDEX|H[123456]|B|BR)/i )
)
{
return
$class->error(
MT->translate(
"Saving [_1] failed: [_2]",
$local_base,
"Invalid image file format."
)
);
} ## end if ( ( $data =~ m/^\s*<[!?]/...))
unless ( defined($has_html) ) {
return $class->errtrans( 'Error reading image [_1]: [_2]',
$local_base, $class->errstr);
}
} ## end if ( $ext =~ m/(jpe?g|png|gif|bmp|tiff?|ico)/i)

## Use Image::Size to check if the uploaded file is an image, and if so,
## record additional image info (width, height). We first rewind the
## filehandle $fh, then pass it in to imgsize.
seek $fh, 0, 0;
eval { require Image::Size; };
return
$class->error(
MT->translate(
"Perl module Image::Size is required to determine "
. "width and height of uploaded images."
)
) if $@;
$@ and return $class->errtrans(
"Perl module Image::Size is required to determine "
. "width and height of uploaded images."
);

my ( $w, $h, $id ) = Image::Size::imgsize($fh);

my $write_file = sub {
Expand Down Expand Up @@ -195,6 +176,85 @@ sub check_upload {
( $w, $h, $id, $write_file );
} ## end sub check_upload

sub has_html_signature {
my $class = shift;
my %args = @_;
my ($fh, $data, $path) = @args{'fh','data','path'};

unless ( $data || $fh || $path ) {
return $class->errtrans(
"No valid arguments passed to MT::Image::has_html_signature");
}

# Convert path to filehandle if provided
if ( defined $path ) {
use Symbol;
$fh = gensym();
open $fh, $path
or return $class->errtrans(
"Could not open $path for reading: $!");
binmode($fh);
}

die "No valid arguments passed to MT::Image::has_html_signature"
unless $data or $fh;

my @patterns = (
# NOTE: The following patterns will be matched against a string
# with all whitespace stripped to simplify HTML matching.
qr{
\A # Start of string
< # Opening angle bracket = HTML tag?
( # First item below matches doctype and/or PHP
[!?]|frameset|iframe|link|base|style|div|p|font|
applet|meta|center|form|isindex|h[123456]|b|br
)
}ix,
qr{<(html|script|title|body|head|plaintext|table|img|pre|a)}i,
qr{text/html}i,
);

# Anonymous subroutine that reads N bytes from either the string in
# $data or the filehandle in $fh.
my $buffer_read;
my $read_buffer = sub {
my $buffer = shift;
defined $data and return substr( $data, 0, $buffer );
my $str;
seek( $fh, 0, 0 );
defined( $buffer_read = read( $fh, $str, $buffer) )
|| die "Could not read filehandle: $!";
# print STDERR "read_buffer: $buffer, buffer_read: $buffer_read\n";
seek( $fh, 0, 0 );
return $str;
};

# Get 1024 bytes of whitespace-free content
my $buffer = 1024;
my $test_string;
while ( ! defined $test_string ) {
my $str = $read_buffer->( $buffer );
$str =~ s{\s}{}g; # Strip whitespace,

# If result string is long enough, trim down to 1024 and assign
# to $test_string which will terminate the while loop
if ( length $str >= 1024 ) {
$test_string = substr( $str, 0, 1024 );
}
# File is shorter than our buffer
elsif ( $buffer_read < 1024 ) {
$test_string = $str
}
# Otherwise, increase the buffer size by 1024 and repeat
else {
$buffer += 1024;
}
}

return scalar grep { $test_string =~ $_ } @patterns;

} ## end sub has_html_signature

package MT::Image::ImageMagick;
@MT::Image::ImageMagick::ISA = qw( MT::Image );

Expand Down Expand Up @@ -852,6 +912,33 @@ location.
If any error occurs from this routine, it will return 'undef', and
assign the error message, accessible using the L<errstr> class method.
=head2 MT::Image->has_html_signature( %param )
This function looks at the first kilobyte (1024 bytes) of whitespace-stripped
data to see if it contains patterns indicative of embedded HTML. This is used
by C<MT::Image::check_upload()> and C<MT::App::validate_upload> to test
whether an uploaded file's content has a malicious payload.
The data to inspect is specified by the function's hash argument with keys:
=over 4
=item * C<data>
This key indicates the value is a scalar data string.
=item * C<fh>
This key indicates the value is a filehandle to the file to be tested.
=item * C<path>
This key specifies the absolute path to the file to be tested.
=back
The function returns true or false and dies on any errors.
=head1 AUTHOR & COPYRIGHT
Please see the I<MT> manpage for author, copyright, and license information.
Expand Down
29 changes: 27 additions & 2 deletions t/09-image.t
Expand Up @@ -26,7 +26,8 @@ BEGIN {
scalar @Img
* scalar @drivers
* 19 ) # 19 tests each for every image and driver
;
+ 1 # has_html_signature
;
}

MT->set_language('en-us');
Expand All @@ -48,7 +49,14 @@ for my $rec (@Img) {

for my $driver (@drivers) {
$cfg->ImageDriver($driver);
my $img = MT::Image->new( Filename => $img_file );

my $img = MT::Image->new( Filename => $img_file )
or diag explain
sprintf(
"%s: Could not instantiate MT::Image object from %s: %s",
$driver, $img_file, MT::Image->errstr()
);

SKIP: {
skip( "no $driver image", 19 ) unless $img;
isa_ok(
Expand Down Expand Up @@ -132,3 +140,20 @@ for my $rec (@Img) {
} # END SKIP
} ## end for my $driver (@drivers)
} ## end for my $rec (@Img)

subtest "MT::Image::has_html_signature" => sub {
my $images = File::Spec->catdir( $ENV{MT_HOME}, qw( t images ));
my $real = MT::Image->has_html_signature(
path => File::Spec->catfile( $images, 'test.jpg' )
);

is( $real, 0, 'has_html_signature with real image' );
defined $real or diag "Err: ".MT::Image->errstr;

my $fake = MT::Image->has_html_signature(
path => File::Spec->catfile( $images, 'fake.jpg' )
);
is( $fake, 1, 'has_html_signature with fake image' )
or diag MT::Image->errstr;
defined $fake or diag "Err: ".MT::Image->errstr;
};
1 change: 1 addition & 0 deletions t/images/fake.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions t/sqlite-test.cfg
@@ -1,6 +1,5 @@
Database db/mt.db
ObjectDriver DBI::sqlite
# PluginPath ../plugins
# PluginPath plugins
TemplatePath ../tmpl
DisableObjectCache 1
NetPBMPath /opt/local/bin

0 comments on commit 0012fb8

Please sign in to comment.