Skip to content

Commit

Permalink
Convert read module over to new ZipEntry/ZipEntryBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
Majored committed Oct 8, 2022
1 parent 14b6d64 commit e4a0aa5
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 149 deletions.
9 changes: 5 additions & 4 deletions src/read/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use super::CompressionReader;
use crate::error::{Result, ZipError};
use crate::read::{OwnedReader, PrependReader, ZipEntry, ZipEntryReader};
use crate::spec::header::LocalFileHeader;
use crate::read::ZipEntryMeta;

use std::io::SeekFrom;
use std::path::{Path, PathBuf};
Expand All @@ -36,7 +37,7 @@ use tokio::io::AsyncSeekExt;
/// A reader which acts concurrently over a filesystem file.
pub struct ZipFileReader {
pub(crate) filename: PathBuf,
pub(crate) entries: Vec<ZipEntry>,
pub(crate) entries: Vec<(ZipEntry, ZipEntryMeta)>,
pub(crate) comment: Option<String>,
}

Expand All @@ -56,16 +57,16 @@ impl ZipFileReader {
let entry = self.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;

let mut fs_file = File::open(&self.filename).await?;
fs_file.seek(SeekFrom::Start(entry.offset.unwrap() as u64 + 4)).await?;
fs_file.seek(SeekFrom::Start(entry.1.file_offset.unwrap() as u64 + 4)).await?;

let header = LocalFileHeader::from_reader(&mut fs_file).await?;
let data_offset = (header.file_name_length + header.extra_field_length) as i64;
fs_file.seek(SeekFrom::Current(data_offset)).await?;

let reader = OwnedReader::Owned(fs_file);
let reader = PrependReader::Normal(reader);
let reader = CompressionReader::from_reader(entry.compression(), reader, entry.compressed_size.map(u32::into))?;
let reader = CompressionReader::from_reader(&entry.0.compression(), reader, Some(entry.0.compressed_size()).map(u32::into))?;

Ok(ZipEntryReader::from_raw(entry, reader, entry.data_descriptor()))
Ok(ZipEntryReader::from_raw(&entry.0, &entry.1, reader, entry.1.general_purpose_flag.data_descriptor))
}
}
9 changes: 5 additions & 4 deletions src/read/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use crate::error::{Result, ZipError};
use crate::read::{CompressionReader, OwnedReader, PrependReader, ZipEntry, ZipEntryReader};
use crate::spec::header::LocalFileHeader;
use crate::read::ZipEntryMeta;

use std::io::{Cursor, SeekFrom};

Expand All @@ -17,7 +18,7 @@ pub type ConcurrentReader<'b, 'a> = ZipEntryReader<'b, Cursor<&'a [u8]>>;
/// A reader which acts concurrently over an in-memory buffer.
pub struct ZipFileReader<'a> {
pub(crate) data: &'a [u8],
pub(crate) entries: Vec<ZipEntry>,
pub(crate) entries: Vec<(ZipEntry, ZipEntryMeta)>,
pub(crate) comment: Option<String>,
}

Expand All @@ -35,16 +36,16 @@ impl<'a> ZipFileReader<'a> {
let entry = self.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;

let mut cursor = Cursor::new(<&[u8]>::clone(&self.data));
cursor.seek(SeekFrom::Start(entry.offset.unwrap() as u64 + 4)).await?;
cursor.seek(SeekFrom::Start(entry.1.file_offset.unwrap() as u64 + 4)).await?;

let header = LocalFileHeader::from_reader(&mut cursor).await?;
let data_offset = (header.file_name_length + header.extra_field_length) as i64;
cursor.seek(SeekFrom::Current(data_offset)).await?;

let reader = OwnedReader::Owned(cursor);
let reader = PrependReader::Normal(reader);
let reader = CompressionReader::from_reader(entry.compression(), reader, entry.compressed_size.map(u32::into))?;
let reader = CompressionReader::from_reader(&entry.0.compression(), reader, Some(entry.0.compressed_size()).map(u32::into))?;

Ok(ZipEntryReader::from_raw(entry, reader, entry.data_descriptor()))
Ok(ZipEntryReader::from_raw(&entry.0, &entry.1, reader, entry.1.general_purpose_flag.data_descriptor))
}
}
96 changes: 16 additions & 80 deletions src/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ pub mod sync;

use crate::error::{Result, ZipError};
use crate::spec::compression::Compression;
use crate::spec::header::GeneralPurposeFlag;
use crate::entry::ZipEntry;
use std::borrow::BorrowMut;

use std::convert::TryInto;
Expand All @@ -20,80 +22,12 @@ use std::task::{Context, Poll};
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
use async_compression::tokio::bufread;
use async_io_utilities::AsyncPrependReader;
use chrono::{DateTime, Utc};
use crc32fast::Hasher;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, BufReader, ReadBuf, Take};

/// An entry within a larger ZIP file reader.
#[derive(Debug)]
pub struct ZipEntry {
pub(crate) name: String,
pub(crate) comment: Option<String>,
pub(crate) data_descriptor: bool,
pub(crate) crc32: Option<u32>,
pub(crate) uncompressed_size: Option<u32>,
pub(crate) compressed_size: Option<u32>,
pub(crate) last_modified: DateTime<Utc>,
pub(crate) extra: Option<Vec<u8>>,
pub(crate) compression: Compression,

// Additional fields from EOCDH.
pub(crate) offset: Option<u32>,
}

impl ZipEntry {
/// Returns a shared reference to the entry's name.
pub fn name(&self) -> &str {
&self.name
}

/// Returns an optional shared reference to the entry's comment.
pub fn comment(&self) -> Option<&str> {
match &self.comment {
Some(comment) => Some(comment),
None => None,
}
}

/// Returns whether or not a data descriptor exists for the entry (ie. whether or not it was stream written).
pub fn data_descriptor(&self) -> bool {
self.data_descriptor
}

/// Returns whether or not the entry represents a directory.
pub fn dir(&self) -> bool {
self.name.ends_with('/')
}

/// Returns an optional CRC32 value for the entry.
pub fn crc32(&self) -> Option<u32> {
self.crc32
}

/// Returns an optional compressed file size for the entry.
pub fn compressed_size(&self) -> Option<u32> {
self.compressed_size
}

/// Returns an optional uncompressed file size for the entry.
pub fn uncompressed_size(&self) -> Option<u32> {
self.uncompressed_size
}

/// Returns a shared reference to the entry's last modification date.
pub fn last_modified(&self) -> &DateTime<Utc> {
&self.last_modified
}

/// Returns an optional shared reference to the extra bytes for the entry.
pub fn extra(&self) -> Option<&Vec<u8>> {
self.extra.as_ref()
}

/// Returns a shared reference to the compression type of the entry.
pub fn compression(&self) -> &Compression {
&self.compression
}
pub(crate) struct ZipEntryMeta {
pub(crate) general_purpose_flag: GeneralPurposeFlag,
pub(crate) file_offset: Option<u32>,
}

pub(crate) enum PrependReader<'a, R: AsyncRead + Unpin> {
Expand Down Expand Up @@ -127,6 +61,7 @@ impl<'a, R: AsyncRead + Unpin> AsyncRead for OwnedReader<'a, R> {
/// A ZIP file entry reader which may implement decompression.
pub struct ZipEntryReader<'a, R: AsyncRead + Unpin> {
pub(crate) entry: &'a ZipEntry,
pub(crate) meta: &'a ZipEntryMeta,
pub(crate) reader: CompressionReader<PrependReader<'a, R>>,
pub(crate) hasher: Hasher,
pub(crate) consumed: bool,
Expand All @@ -153,9 +88,10 @@ pub(crate) enum State {

impl<'a, R: AsyncRead + Unpin> ZipEntryReader<'a, R> {
/// Construct an entry reader from its raw parts (a shared reference to the entry and an inner reader).
pub(crate) fn from_raw(entry: &'a ZipEntry, reader: CompressionReader<PrependReader<'a, R>>, _: bool) -> Self {
pub(crate) fn from_raw(entry: &'a ZipEntry, meta: &'a ZipEntryMeta, reader: CompressionReader<PrependReader<'a, R>>, _: bool) -> Self {
ZipEntryReader {
entry,
meta,
reader,
hasher: Hasher::new(),
consumed: false,
Expand All @@ -179,10 +115,10 @@ impl<'a, R: AsyncRead + Unpin> ZipEntryReader<'a, R> {
let hasher = std::mem::take(&mut self.hasher);
let final_crc = hasher.finalize();

if self.entry().data_descriptor() {
if self.meta.general_purpose_flag.data_descriptor {
self.data_descriptor.expect("Data descriptor was not read").0 == final_crc
} else {
self.entry().crc32().expect("Missing CRC32 in Local file header") == final_crc
self.entry.crc32() == final_crc
}
}

Expand Down Expand Up @@ -295,7 +231,7 @@ impl<'a, R: AsyncRead + Unpin> ZipEntryReader<'a, R> {
///
/// Reads all bytes until EOF and returns an owned vector of them.
pub async fn read_to_end_crc(mut self) -> Result<Vec<u8>> {
let mut buffer = Vec::with_capacity(self.entry.uncompressed_size.unwrap().try_into().unwrap());
let mut buffer = Vec::with_capacity(self.entry.uncompressed_size().try_into().unwrap());
self.read_to_end(&mut buffer).await?;

if self.compare_crc() {
Expand All @@ -309,7 +245,7 @@ impl<'a, R: AsyncRead + Unpin> ZipEntryReader<'a, R> {
///
/// Reads all bytes until EOF and returns an owned string of them.
pub async fn read_to_string_crc(mut self) -> Result<String> {
let mut buffer = String::with_capacity(self.entry.uncompressed_size.unwrap().try_into().unwrap());
let mut buffer = String::with_capacity(self.entry.uncompressed_size().try_into().unwrap());
self.read_to_string(&mut buffer).await?;

if self.compare_crc() {
Expand Down Expand Up @@ -357,7 +293,7 @@ impl<'a, R: AsyncRead + Unpin> AsyncRead for ZipEntryReader<'a, R> {

self.consumed = true;

if self.data_descriptor.is_none() && self.entry().data_descriptor() {
if self.data_descriptor.is_none() && self.meta.general_purpose_flag.data_descriptor {
self.state = State::ReadDescriptor([0u8; 16], 0);

self.poll_data_descriptor(c)
Expand Down Expand Up @@ -471,14 +407,14 @@ impl<'a, R: AsyncRead + Unpin> CompressionReader<R> {
macro_rules! reader_entry_impl {
() => {
/// Returns a shared reference to a list of the ZIP file's entries.
pub fn entries(&self) -> &Vec<ZipEntry> {
&self.entries
pub fn entries(&self) -> Vec<&ZipEntry> {
self.entries.iter().map(|entry| &entry.0).collect()
}

/// Searches for an entry with a specific filename.
pub fn entry(&self, name: &str) -> Option<(usize, &ZipEntry)> {
for (index, entry) in self.entries().iter().enumerate() {
if entry.name() == name {
if entry.filename() == name {
return Some((index, entry));
}
}
Expand Down
55 changes: 28 additions & 27 deletions src/read/seek.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
//! ```

use crate::error::{Result, ZipError};
use crate::read::{CompressionReader, OwnedReader, PrependReader, ZipEntry, ZipEntryReader};
use crate::read::{CompressionReader, OwnedReader, PrependReader, ZipEntry, ZipEntryReader, ZipEntryMeta};
use crate::spec::compression::Compression;
use crate::spec::attribute::AttributeCompatibility;
use crate::spec::header::{CentralDirectoryHeader, EndOfCentralDirectoryHeader, LocalFileHeader};

use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
Expand All @@ -38,7 +39,7 @@ use std::io::SeekFrom;
/// A reader which acts over a seekable source.
pub struct ZipFileReader<R: AsyncRead + AsyncSeek + Unpin> {
pub(crate) reader: R,
pub(crate) entries: Vec<ZipEntry>,
pub(crate) entries: Vec<(ZipEntry, ZipEntryMeta)>,
pub(crate) comment: Option<String>,
}

Expand All @@ -55,23 +56,23 @@ impl<R: AsyncRead + AsyncSeek + Unpin> ZipFileReader<R> {
pub async fn entry_reader(&mut self, index: usize) -> Result<ZipEntryReader<'_, R>> {
let entry = self.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;

self.reader.seek(SeekFrom::Start(entry.offset.unwrap() as u64 + 4)).await?;
self.reader.seek(SeekFrom::Start(entry.1.file_offset.unwrap() as u64 + 4)).await?;

let header = LocalFileHeader::from_reader(&mut self.reader).await?;
let data_offset = (header.file_name_length + header.extra_field_length) as i64;
self.reader.seek(SeekFrom::Current(data_offset)).await?;

let reader = OwnedReader::Borrow(&mut self.reader);
let reader = PrependReader::Normal(reader);
let reader = CompressionReader::from_reader(entry.compression(), reader, entry.compressed_size.map(u32::into))?;
let reader = CompressionReader::from_reader(&entry.0.compression(), reader, Some(entry.0.compressed_size()).map(u32::into))?;

Ok(ZipEntryReader::from_raw(entry, reader, entry.data_descriptor()))
Ok(ZipEntryReader::from_raw(&entry.0, &entry.1, reader, entry.1.general_purpose_flag.data_descriptor))
}
}

pub(crate) async fn read_cd<R: AsyncRead + AsyncSeek + Unpin>(
reader: &mut R,
) -> Result<(Vec<ZipEntry>, Option<String>)> {
) -> Result<(Vec<(ZipEntry, ZipEntryMeta)>, Option<String>)> {
const MAX_ENDING_LENGTH: u64 = u16::MAX as u64 + 22;

let length = reader.seek(SeekFrom::End(0)).await?;
Expand Down Expand Up @@ -131,34 +132,34 @@ pub(crate) async fn read_cd<R: AsyncRead + AsyncSeek + Unpin>(
Ok((entries, comment))
}

pub(crate) async fn read_cd_entry<R: AsyncRead + Unpin>(reader: &mut R) -> Result<ZipEntry> {
pub(crate) async fn read_cd_entry<R: AsyncRead + Unpin>(reader: &mut R) -> Result<(ZipEntry, ZipEntryMeta)> {
crate::utils::assert_signature(reader, crate::spec::signature::CENTRAL_DIRECTORY_FILE_HEADER).await?;

let header = CentralDirectoryHeader::from_reader(reader).await?;
let filename = async_io_utilities::read_string(reader, header.file_name_length.into()).await?;
let extra = async_io_utilities::read_bytes(reader, header.extra_field_length.into()).await?;
let compression = Compression::try_from(header.compression)?;
let extra_field = async_io_utilities::read_bytes(reader, header.extra_field_length.into()).await?;
let comment = async_io_utilities::read_string(reader, header.file_comment_length.into()).await?;
let data_descriptor = header.flags.data_descriptor;

let (crc32, uncompressed_size, compressed_size) =
if data_descriptor && header.crc == 0 && header.uncompressed_size == 0 && header.compressed_size == 0 {
(None, None, None)
} else {
(Some(header.crc), Some(header.uncompressed_size), Some(header.compressed_size))
};
let last_modification_date = crate::spec::date::zip_date_to_chrono(header.mod_date, header.mod_time);

let entry = ZipEntry {
name: filename,
comment: Some(comment),
data_descriptor,
crc32,
uncompressed_size,
compressed_size,
last_modified: crate::spec::date::zip_date_to_chrono(header.mod_date, header.mod_time),
extra: Some(extra),
compression: Compression::try_from(header.compression)?,
offset: Some(header.lh_offset),
filename,
compression,
attribute_compatibility: AttributeCompatibility::Unix, /// FIXME: Default to Unix for the moment
crc32: header.crc,
uncompressed_size: header.uncompressed_size,
compressed_size: header.compressed_size,
last_modification_date,
internal_file_attribute: header.inter_attr,
external_file_attribute: header.exter_attr,
extra_field,
comment
};

let meta = ZipEntryMeta {
general_purpose_flag: header.flags,
file_offset: Some(header.lh_offset),
};

Ok(entry)
Ok((entry, meta))
}
Loading

0 comments on commit e4a0aa5

Please sign in to comment.