Skip to content

Commit

Permalink
Get rid of sanitize_filename for archive extraction as it can't possi…
Browse files Browse the repository at this point in the history
…bly save us anymore

+ add an extra sanity check to fully fix corrupt files on file serving
  • Loading branch information
Difegue committed Nov 23, 2021
1 parent 65ea864 commit afc9a5c
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 46 deletions.
11 changes: 9 additions & 2 deletions lib/LANraragi/Model/Archive.pm
Original file line number Diff line number Diff line change
Expand Up @@ -166,21 +166,28 @@ sub serve_page {

my $logger = get_logger( "File Serving", "lanraragi" );

$logger->debug("Page /$id/$path was requested");

my $tempfldr = get_temp();
my $file = LANraragi::Utils::Archive::sanitize_filename( $tempfldr . "/$id/$path" );
my $file = $tempfldr . "/$id/$path";

if ( -e $file ) {

# Freshly created files might not be complete yet.
# We have to wait before trying to serve them out...
my $last_size = 0;
my $size = -s $file;
my $timeout = 0;
while (1) {
$logger->debug("Waiting for file to be fully written ($size, previously $last_size)");
usleep(10000); # 10ms
$timeout += 10; # Sanity check in case the file remains at 0 bytes forever
$last_size = $size;
$size = -s $file;
last if ( $last_size eq $size ); # If the size hasn't changed since the last loop, it's likely the file is ready.

# If the size hasn't changed since the last loop, it's likely the file is ready.
last
if ( $last_size eq $size && ( $size ne 0 || $timeout > 1000 ) );
}

} else {
Expand Down
4 changes: 1 addition & 3 deletions lib/LANraragi/Model/Reader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ sub build_reader_JSON {
# To investigate further, perhaps with custom sorting algorithms?
@images = sort { &expand($a) cmp &expand($b) } @images;

$self->LRR_LOGGER->debug( "Files found in archive: \n " . Dumper @images );
$self->LRR_LOGGER->debug( "Files found in archive (encoding might be incorrect): \n " . Dumper @images );

# Build a browser-compliant filepath array from @images
my @images_browser;
Expand All @@ -92,8 +92,6 @@ sub build_reader_JSON {
# Then we bring the slashes back.
$imgpath =~ s!%2F!/!g;

$self->LRR_LOGGER->debug("Will be extracted to disk as: $imgpath");

# Bundle this path into an API call which will be used by the browser
push @images_browser, "./api/archives/$id/page?path=$imgpath";
}
Expand Down
60 changes: 19 additions & 41 deletions lib/LANraragi/Utils/Archive.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,37 +47,14 @@ sub generate_thumbnail {
undef $img;
}

# sanitize_filename(filename)
# Converts extracted filenames to an ascii variant to avoid extra filesystem headaches.
sub sanitize_filename {

my $filename = $_[0];
eval {
# Try a guess to regular japanese encodings first
$filename = decode( "Guess", $filename );
};

# Fallback to utf8
$filename = decode_utf8($filename) if $@;

# Re-encode the result to ASCII and move the file to said result name.
# Use Encode's coderef feature to map non-ascii characters to their Unicode codepoint equivalent.
$filename = encode( "ascii", $filename, sub { sprintf "%04X", shift } );

if ( length $filename > 254 ) {
$filename = substr( $filename, 0, 254 );
}

return $filename;
}

# extract_archive(path, archive_to_extract, force)
# Extract the given archive to the given path.
# This sub won't re-extract files already present in the destination unless force = 1.
sub extract_archive {

my ( $destination, $to_extract, $force_extract ) = @_;
my $logger = get_logger( "Archive", "lanraragi" );
$logger->debug("Fully extracting archive $to_extract");

# PDFs are handled by Ghostscript (alas)
if ( is_pdf($to_extract) ) {
Expand All @@ -92,14 +69,16 @@ sub extract_archive {
if ($force_extract) { return 1; }

my $filename = $e->pathname;
$filename = sanitize_filename($filename);
if ( -e "$destination/$filename" ) {
$logger->debug("$filename already exists in $destination");
return 0;
}
$logger->debug("Extracting $filename");

# Pre-emptively create the file to signal we're working on it
open( my $fh, ">", "$destination/$filename" ) or return 0;
open( my $fh, ">", "$destination/$filename" )
or
$logger->error("Couldn't create placeholder file $destination/$filename (might be a folder?), moving on nonetheless");
close $fh;
return 1;
}
Expand All @@ -112,16 +91,6 @@ sub extract_archive {
my $result_dir = $ae->to;
my $cwd = getcwd();

# Rename extracted files and folders to an encoded version for easier handling
finddepth(
sub {
unless ( $_ eq '.' ) {
move( $_, sanitize_filename($_) );
}
},
$result_dir
);

# chdir back to the base cwd in case finddepth died midway
chdir $cwd;

Expand Down Expand Up @@ -291,18 +260,27 @@ sub extract_single_file {
} else {

my $contents = "";
my $peek = Archive::Libarchive::Peek->new( filename => $archive );
$contents = $peek->file($filepath);
my $peek = Archive::Libarchive::Peek->new( filename => $archive );
my @files = $peek->files;

for my $name (@files) {
my $decoded_name = LANraragi::Utils::Database::redis_decode($name);

# This sub can receive either encoded or raw filenames, so we have to test for both.
if ( $decoded_name eq $filepath || $name eq $filepath ) {
$logger->debug("Found file $filepath in archive $archive");
$contents = $peek->file($name);
last;
}
}

open( my $fh, '>', $outfile )
or die "Could not open file '$outfile' $!";
print $fh $contents;
close $fh;
}

my $fixed_name = sanitize_filename($outfile);
move( $outfile, $fixed_name );
return $fixed_name;
return $outfile;
}

# extract_file_from_archive($archive, $file)
Expand Down
1 change: 1 addition & 0 deletions tools/Documentation/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.gitbook
1 change: 1 addition & 0 deletions tools/Documentation/basic-operations/archives.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ You can customize display options to show the index in compact mode, as well as
![Customized compact mode to sort by ratings](../.screenshots/ratings.png)

The topmost **carousel** view will show random archives from your current search in both thumbnail and compact modes -- It can also be configured to show New or Untagged archives instead.

![Index page of a regular LRR install in compact mode](<https://raw.githubusercontent.com/Difegue/LANraragi/dev/tools/\_screenshots/archive\_list.png>)

{% hint style="info" %}
Expand Down

0 comments on commit afc9a5c

Please sign in to comment.