Skip to content

Commit

Permalink
FS|Performance|libcore: Cache ZIP metadata
Browse files Browse the repository at this point in the history
ZIP directory metadata is now cached so that we can populate the
filesystem without having to actually read the ZIP file contents.

The cached metadata is stored in “/home/cache/metadata/ZipArchive”.

de::ZipArchive was modified to allow reading the central directory
either from the original file or from the cached data.
  • Loading branch information
skyjake committed Feb 14, 2017
1 parent 83f789a commit 64a54b0
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 100 deletions.
9 changes: 5 additions & 4 deletions doomsday/sdk/libcore/include/de/filesys/metadatabank.h
Expand Up @@ -38,16 +38,17 @@ class DENG2_PUBLIC MetadataBank : public Bank
/**
* Adds a new metadata entry into the bank.
*
* @param id Meta ID.
* @param category Metadata category. Each category is saved in its own subfolder.
* @param id Meta ID.
*
* @return The cached metadata, if available. This will be an empty Block if no
* metadata has yet been cached.
*/
Block check(Block const &id);
Block check(String const &category, Block const &id);

void setMetadata(Block const &id, Block const &metadata);
void setMetadata(String const &category, Block const &id, Block const &metadata);

Block metadata(Block const &id) const;
Block metadata(String const &category, Block const &id) const;

protected:
IData *loadFromSource(ISource &source) override;
Expand Down
234 changes: 149 additions & 85 deletions doomsday/sdk/libcore/src/data/ziparchive.cpp
Expand Up @@ -261,30 +261,67 @@ struct CentralEnd : public ISerializable {

using namespace internal;

static String ZIPARCHIVE_META_CATEGORY = "ZipArchive";

DENG2_PIMPL(ZipArchive)
{
Block directoryCacheId;
CentralEnd zipSummary;
QVector<std::pair<Block, CentralFileHeader>> centralHeaders;

Impl(Public *i) : Base(i) {}

void readCentralDirectory(Reader &reader, bool updateFromLocalHeaders)
/**
* Locates the central directory. Start from the earliest location where
* the signature might be.
*
* @param reader Source.
*/
bool seekToCentralEnd(Reader &reader)
{
// The central directory end record is at the signature we found.
CentralEnd summary;
reader >> summary;
for (duint pos = CENTRAL_END_SIZE; pos < MAXIMUM_COMMENT_SIZE + CENTRAL_END_SIZE; pos++)
{
reader.setOffset(reader.source()->size() - pos);
duint32 signature;
reader >> signature;
if (signature == SIG_END_OF_CENTRAL_DIR)
{
// This is it!
return true;
}
}
return false;
}

duint const entryCount = summary.totalEntryCount;
/**
* Reads the central directory. THe reader must currently be at the start of the
* CentralEnd block.
*
* @param reader Source.
* @param updateFromLocalHeaders Read the local file headers, too.
* @param overrideLocalHeaderStart Offset to apply to all positions within the source data.
* Used for pretending that cached metadata is at its original
* offset instead of the beginning of the cache file.
* Ignored if @a updateFromLocalHeaders is true.
*/
void readCentralDirectory(Reader &reader, bool readingFromOriginal,
IByteArray::Offset localHeaderStartOffset = IByteArray::Offset(-1))
{
reader >> zipSummary;

duint const entryCount = zipSummary.totalEntryCount;

// The ZIP must have only one part, all entries in the same archive.
if (entryCount != summary.diskEntryCount)
if (entryCount != zipSummary.diskEntryCount)
{
/// @throw MultiPartError ZIP archives in more than one part are not supported
/// by the implementation.
throw MultiPartError("ZipArchive::readCentralDirectory", "Multipart archives are not supported");
}

// Read all the entries of the central directory.
reader.setOffset(summary.offset);
reader.setOffset(readingFromOriginal? IByteArray::Offset(zipSummary.offset)
: localHeaderStartOffset);
for (duint index = 0; index < entryCount; ++index)
{
CentralFileHeader header;
Expand All @@ -294,7 +331,8 @@ DENG2_PIMPL(ZipArchive)
if (header.signature != SIG_CENTRAL_FILE_HEADER)
{
/// @throw FormatError Invalid signature in a central directory entry.
throw FormatError("ZipArchive::readCentralDirectory", "Corrupt central directory");
throw FormatError("ZipArchive::readCentralDirectory",
QString("Corrupt central directory (at file %1)").arg(index));
}

Block latin1Name;
Expand All @@ -304,63 +342,101 @@ DENG2_PIMPL(ZipArchive)
// Advance the cursor past the variable sized fields.
reader.seek(header.extraFieldSize + header.commentSize);

// Skip folders.
if (fileName.endsWith("/") && !header.size)
{
continue;
}
ZipEntry *entry = nullptr;

// Check for unsupported features.
if (header.compression != NO_COMPRESSION && header.compression != DEFLATED)
{
/// @throw UnknownCompressionError Deflation is the only compression
/// algorithm supported by the implementation.
throw UnknownCompressionError("ZipArchive::readCentralDirectory",
"Entry '" + fileName + "' uses an unsupported compression algorithm");
}
if (header.flags & ZFH_ENCRYPTED)
// Skip folders.
if (!fileName.endsWith("/") || header.size)
{
/// @throw EncryptionError Archive is encrypted, which is not supported
/// by the implementation.
throw EncryptionError("ZipArchive::readCentralDirectory",
"Entry '" + fileName + "' is encrypted and thus cannot be read");
// Check for unsupported features.
if (header.compression != NO_COMPRESSION && header.compression != DEFLATED)
{
/// @throw UnknownCompressionError Deflation is the only compression
/// algorithm supported by the implementation.
throw UnknownCompressionError("ZipArchive::readCentralDirectory",
"Entry '" + fileName + "' uses an unsupported compression algorithm");
}
if (header.flags & ZFH_ENCRYPTED)
{
/// @throw EncryptionError Archive is encrypted, which is not supported
/// by the implementation.
throw EncryptionError("ZipArchive::readCentralDirectory",
"Entry '" + fileName + "' is encrypted and thus cannot be read");
}

// Make an index entry for this.
entry = static_cast<ZipEntry *>(&self().insertEntry(fileName));

entry->size = header.size;
entry->sizeInArchive = header.compressedSize;
entry->compression = header.compression;
entry->crc32 = header.crc32;
entry->localHeaderOffset = header.relOffset;

// Unpack the last modified time from the ZIP entry header.
DOSDate lastModDate(header.lastModDate);
DOSTime lastModTime(header.lastModTime);
entry->modifiedAt = QDateTime(QDate(lastModDate.year + 1980, lastModDate.month, lastModDate.dayOfMonth),
QTime(lastModTime.hours, lastModTime.minutes, lastModTime.seconds));

if (readingFromOriginal)
{
LocalFileHeader localHeader;

// Check where the entry's data is located by reading the local header.
reader.mark();
reader.setOffset(header.relOffset);
reader >> localHeader;

entry->offset = reader.offset() +
localHeader.fileNameSize +
localHeader.extraFieldSize;

// Back to the central directory.
reader.rewind();
}
else
{
// Cached headers' entries point directly to the data.
entry->offset = header.relOffset;
}
}

LocalFileHeader localHeader;
if (updateFromLocalHeaders)
if (readingFromOriginal)
{
// Read the local file header, which contains the correct extra
// field size (Info-ZIP!).
reader.mark();
reader.setOffset(header.relOffset);
reader >> localHeader;
// We'll keep the central header for caching. However, the cached
// directory never includes additional fields.
header.extraFieldSize = 0;
header.commentSize = 0;

// Cache's relOffset is actually the entry data starting offset.
header.relOffset = (entry? duint32(entry->offset) : 0);
centralHeaders << std::make_pair(latin1Name, header);
}
}
}

// Make an index entry for this.
ZipEntry &entry = static_cast<ZipEntry &>(self().insertEntry(fileName));

entry.size = header.size;
entry.sizeInArchive = header.compressedSize;
entry.compression = header.compression;
entry.crc32 = header.crc32;
entry.localHeaderOffset = header.relOffset;

// Unpack the last modified time from the ZIP entry header.
DOSDate lastModDate(header.lastModDate);
DOSTime lastModTime(header.lastModTime);
entry.modifiedAt = QDateTime(QDate(lastModDate.year + 1980, lastModDate.month, lastModDate.dayOfMonth),
QTime(lastModTime.hours, lastModTime.minutes, lastModTime.seconds));

if (updateFromLocalHeaders)
{
entry.offset = reader.offset() + header.fileNameSize + localHeader.extraFieldSize;

// Back to the central directory.
reader.rewind();
}
/**
* Write a copy of the unmodified central directory as it was read from the source
* archive. This is saved in the metadata cache.
*
* @param writer Destination.
*/
void writeCachedCentralDirectory(Writer &writer)
{
for (auto const &name_header : centralHeaders)
{
writer << name_header.second
<< FixedByteArray(name_header.first);
}
writer << duint32(SIG_END_OF_CENTRAL_DIR) << zipSummary;
}

/**
* Writes a new central directory for a new ZIP archive as it will be written by
* ZipArchive.
*
* @param writer Destination.
*/
void writeCentralDirectory(Writer &writer)
{
CentralEnd summary;
Expand Down Expand Up @@ -404,34 +480,36 @@ DENG2_PIMPL(ZipArchive)

void updateCachedDirectory()
{
if (!directoryCacheId.isEmpty())
if (directoryCacheId)
{
Block meta;
Writer writer(meta);
writeCentralDirectory(writer);
MetadataBank::get().setMetadata(directoryCacheId, meta);
writeCachedCentralDirectory(writer);
MetadataBank::get().setMetadata(ZIPARCHIVE_META_CATEGORY, directoryCacheId, meta);
}
}

bool restoreFromCache()
{
if (directoryCacheId.isEmpty()) return false;
if (!directoryCacheId) return false;

auto &bank = MetadataBank::get();

try
{
Block const meta = bank.check(directoryCacheId);
if (meta.isEmpty()) return false;
qDebug() << "restoring from cache" << directoryCacheId;
Reader reader(meta);
readCentralDirectory(reader, false);
return true;
if (Block const meta = bank.check(ZIPARCHIVE_META_CATEGORY, directoryCacheId))
{
Reader reader(meta);
if (!seekToCentralEnd(reader)) return false;
readCentralDirectory(reader, false, 0);
return true;
}
}
catch (Error const &)
catch (Error const &er)
{
return false;
LOGDEV_RES_WARNING("Corrupt cached metadata: %s") << er.asText();
}
return false;
}
};

Expand All @@ -455,32 +533,18 @@ ZipArchive::ZipArchive(IByteArray const &archive, Block const &dirCacheId)
}

Reader reader(archive, littleEndianByteOrder);

// Locate the central directory. Start from the earliest location where
// the signature might be.
duint centralEndPos = 0;
for (duint pos = CENTRAL_END_SIZE; pos < MAXIMUM_COMMENT_SIZE; pos++)
{
reader.setOffset(archive.size() - pos);
duint32 signature;
reader >> signature;
if (signature == SIG_END_OF_CENTRAL_DIR)
{
// This is it!
centralEndPos = archive.size() - pos;
break;
}
}
if (!centralEndPos)
if (!d->seekToCentralEnd(reader))
{
/// @throw MissingCentralDirectoryError The ZIP central directory was not found
/// in the end of the source data.
throw MissingCentralDirectoryError("ZipArchive::Archive",
"Could not locate the central directory of the archive");
}

d->readCentralDirectory(reader, true);
d->updateCachedDirectory();

// No need to keep these any more.
d->centralHeaders.clear();
}

void ZipArchive::readFromSource(Entry const &e, Path const &, IBlock &uncompressedData) const
Expand Down
12 changes: 10 additions & 2 deletions doomsday/sdk/libcore/src/filesys/archiveentryfile.cpp
Expand Up @@ -39,6 +39,14 @@ DENG2_PIMPL_NOREF(ArchiveEntryFile)
{
if (!readBlock)
{
#if 0
{
static Lockable dbg;
DENG2_GUARD(dbg);
qDebug() << "--------\nAEF being read" << entryPath;
DENG2_PRINT_BACKTRACE();
}
#endif
readBlock = &const_cast<Archive const *>(archive)->entryBlock(entryPath);
}
return *readBlock;
Expand Down Expand Up @@ -104,9 +112,9 @@ void ArchiveEntryFile::flush()
Block ArchiveEntryFile::metaId() const
{
Block data = File::metaId() + d->entryPath.toUtf8();
if (auto const *archFeed = originFeed()->maybeAs<ArchiveFeed>())
if (File const *sourceFile = dynamic_cast<File const *>(archive().source()))
{
data += archFeed->archiveSourceFile().metaId();
data += sourceFile->metaId();
}
return data.md5Hash();
}
Expand Down
2 changes: 0 additions & 2 deletions doomsday/sdk/libcore/src/filesys/archivefeed.cpp
Expand Up @@ -67,8 +67,6 @@ DENG2_PIMPL(ArchiveFeed)
{
LOG_RES_XVERBOSE("Source %s is a byte array", f.description());

qDebug() << "loading" << f.description() << f.metaId();

arch = new ZipArchive(*bytes, f.metaId());
}
else
Expand Down

0 comments on commit 64a54b0

Please sign in to comment.