Skip to content

Commit

Permalink
Support seek and variable-sized chunks with lazperf. (#3529)
Browse files Browse the repository at this point in the history
* Initialize UUID to null.
Close #3483

* Catch bounds exceptions.

* Support LAZperf 2.1 and variable-size chunks.

* Make sure we find LAZperf 2.1

* Support reading and seeking with variable-sized chunks.

* Improve chunk table description.
  • Loading branch information
abellgithub committed Sep 7, 2021
1 parent 84eba91 commit 141c07e
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 87 deletions.
2 changes: 1 addition & 1 deletion cmake/lazperf.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
option(WITH_LAZPERF "Build PDAL with support for LAZ compression with LAZPERF" TRUE)
if (WITH_LAZPERF)
find_package(LAZPERF 2.0 QUIET)
find_package(LAZPERF 2.1 QUIET)
set_package_properties(LAZPERF PROPERTIES TYPE OPTIONAL)
if (LAZPERF_FOUND)
set(LAZPERF_LIBRARY "LAZPERF::lazperf")
Expand Down
2 changes: 1 addition & 1 deletion io/LasHeader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ void LasHeader::setScaling(const Scaling& scaling)
}


uint16_t LasHeader::basePointLen(uint8_t type)
uint16_t LasHeader::basePointLen(uint8_t type) const
{
const uint16_t len[] = { 20, 28, 26, 34, 57, 63, 30, 36, 38, 59, 67 };
const size_t numTypes = sizeof(len) / sizeof(len[0]);
Expand Down
4 changes: 2 additions & 2 deletions io/LasHeader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ class PDAL_DLL LasHeader
{ return m_pointLen; }
void setPointLen(uint16_t v)
{ m_pointLen = v; }
uint16_t basePointLen()
uint16_t basePointLen() const
{ return basePointLen(m_pointFormat); }
uint16_t basePointLen(uint8_t format);
uint16_t basePointLen(uint8_t format) const;

/// Set the number of points.
/// \param pointCount Number of points in the file.
Expand Down
8 changes: 4 additions & 4 deletions io/LasReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,7 @@ void LasReader::ready(PointTableRef table)
const LasVLR *vlr = m_p->header.findVlr(LASZIP_USER_ID, LASZIP_RECORD_ID);
if (!vlr)
throwError("LAZ file missing required laszip VLR.");
int ebCount = m_p->header.pointLen() - m_p->header.basePointLen();
m_p->decompressor = new LazPerfVlrDecompressor(*stream, m_p->header.pointFormat(),
ebCount, m_p->header.pointOffset(), vlr->data());
m_p->decompressor = new LazPerfVlrDecompressor(*stream, m_p->header, vlr->data());
if (m_args->start > 0)
{
if (m_args->start > m_p->header.pointCount())
Expand Down Expand Up @@ -687,7 +685,9 @@ bool LasReader::processOne(PointRef& point)
#ifdef PDAL_HAVE_LAZPERF
if (m_args->compression == "LAZPERF")
{
m_p->decompressor->decompress(m_p->decompressorBuf.data());
if (!m_p->decompressor->decompress(m_p->decompressorBuf.data()))
throwError("Error reading point " + std::to_string(m_p->index) +
" from " + m_filename + ". Invalid/corrupt file.");
loadPoint(point, m_p->decompressorBuf.data(), pointLen);
}
#endif
Expand Down
177 changes: 114 additions & 63 deletions pdal/compression/LazPerfVlrCompression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,41 +42,14 @@

#include <pdal/util/IStream.hpp>
#include <pdal/util/OStream.hpp>
#include <io/LasHeader.hpp>
#include <pdal/pdal_types.hpp>

#include "LazPerfVlrCompression.hpp"

namespace pdal
{

namespace
{

size_t baseCount(int format)
{
switch (format)
{
case 0:
return 20;
case 1:
return 28;
case 2:
return 26;
case 3:
return 34;
case 6:
return 30;
case 7:
return 36;
case 8:
return 38;
default:
return 0;
}
}

} // unnamed namespace

// This compressor write data in chunks to a stream. At the beginning of the
// data is an offset to the end of the data, where the chunk table is
// stored. The chunk table keeps a list of the offsets to the beginning of
Expand Down Expand Up @@ -205,13 +178,17 @@ void LazPerfVlrCompressor::done()

class LazPerfVlrDecompressorImpl
{
using ChunkList = std::vector<lazperf::chunk>;
using ChunkIter = std::vector<lazperf::chunk>::iterator;

public:
LazPerfVlrDecompressorImpl(std::istream& stream, int format, int ebCount,
std::streamoff pointOffset, const char *vlrdata) :
m_stream(stream), m_fileStream(stream), m_format(format), m_ebCount(ebCount),
m_chunkPointsRead(0), m_vlr(vlrdata)
LazPerfVlrDecompressorImpl(std::istream& stream, const LasHeader& header, const char *vlrdata) :
m_stream(stream), m_fileStream(stream), m_format(header.pointFormat()),
m_pointLen(header.pointLen()), m_ebCount(header.pointLen() - header.basePointLen()),
m_pointCount(header.pointCount()), m_vlr(vlrdata), m_chunkPointsTotal(0),
m_chunkPointsRead(0), m_curChunk(m_chunks.end())
{
m_stream.seekg(pointOffset);
m_stream.seekg(header.pointOffset());
ILeStream in(&stream);

uint64_t chunkTablePos;
Expand All @@ -226,48 +203,91 @@ class LazPerfVlrDecompressorImpl
if (version != 0)
throw pdal_error("Invalid version " + std::to_string(version) + " found in LAZ VLR.");

std::vector<uint32_t> chunks =
lazperf::decompress_chunk_table(m_fileStream.cb(), numChunks);
m_chunkOffsets.push_back(pointOffset + sizeof(uint64_t));
for (uint32_t chunkSize : chunks)
m_chunkOffsets.push_back(m_chunkOffsets.back() + chunkSize);
bool variable = (m_vlr.chunk_size == lazperf::VariableChunkSize);

m_chunks = lazperf::decompress_chunk_table(m_fileStream.cb(), numChunks, variable);

// If the chunk size is fixed, set the counts to the chunk size since
// they aren't stored in the chunk table..
if (!variable)
{
uint64_t remaining = m_pointCount;
for (lazperf::chunk& chunk : m_chunks)
{
chunk.count = (std::min)((uint64_t)m_vlr.chunk_size, remaining);
remaining -= chunk.count;
}
assert(remaining == 0);
}

// Add a chunk at the beginning that has a count of 0 and an offset of the
// start of the first chunk.
m_chunks.insert(m_chunks.begin(), {0, header.pointOffset() + sizeof(uint64_t)});

// Fix up the chunk table such that the offsets are absolute offsets to the
// chunk and the counts are cumulative counts of points before the chunk.
// When we're done, the chunk table looks like this, where N is the number of chunks:

// Chunk table entry 1: offset to chunk 1, count of 0
// Chunk table entry 2: offset to chunk 2, count of chunk 1
// Chunk table entry 3: offset to chunk 3, count of chunk 1 + 2
// ...
// Chunk table entry N: offset to chunk N, count of chunk 1 + ... + N
// Chunk table entry N + 1: offset to end of chunks (start of chunk table),
// count is the total number of points in all chunks.

for (size_t i = 1; i < m_chunks.size(); ++i)
{
m_chunks[i].offset += m_chunks[i - 1].offset;
m_chunks[i].count += m_chunks[i - 1].count;
}

// Clear EOF
m_stream.clear();
resetDecompressor();
m_stream.seekg(m_chunkOffsets[0]);
setChunk(m_chunks.begin());
m_stream.seekg(m_curChunk->offset);
m_fileStream.reset();
}


bool seek(int64_t record)
bool seek(uint64_t record)
{
if (record < 0)
if (record < 0 || record >= m_pointCount || m_chunks.empty())
return false;

std::vector<char> buf(baseCount(m_format) + m_ebCount);
int64_t chunk = record / m_vlr.chunk_size;
int64_t offset = record % m_vlr.chunk_size;
// Search for the chunk containing the requested record.
auto ci = std::upper_bound(m_chunks.begin(), m_chunks.end(), record,
[](uint64_t record, const lazperf::chunk& c) { return record < c.count; });

m_stream.seekg(m_chunkOffsets[chunk]);
if (ci == m_chunks.begin()) // Should never happen.
return false;

ci--;

// Calculate the number of points we need to skip in the located chunk.
setChunk(ci);
uint64_t toRead = record - ci->count;
m_stream.seekg(ci->offset);
m_fileStream.reset();
while (offset > 0)
{
decompress(buf.data());
offset--;
}
std::vector<char> buf(m_pointLen);
while (toRead--)
if (!decompress(buf.data()))
return false;
return true;
}

void decompress(char *outbuf)
bool decompress(char *outbuf)
{
if (m_chunkPointsRead == m_vlr.chunk_size)
if (chunkDone())
{
if (!nextChunk())
return false;
resetDecompressor();
m_chunkPointsRead = 0;
}
m_decompressor->decompress(outbuf);
m_chunkPointsRead++;
return true;
}

private:
Expand All @@ -276,34 +296,65 @@ class LazPerfVlrDecompressorImpl
m_decompressor = lazperf::build_las_decompressor(m_fileStream.cb(), m_format, m_ebCount);
}

bool nextChunk()
{
if (m_curChunk == m_chunks.end())
return false;
if (!setChunk(m_curChunk + 1))
return false;
m_chunkPointsRead = 0;
return true;
}

bool setChunk(ChunkIter chunk)
{
m_curChunk = chunk;
auto nextChunk = chunk + 1;
// The chunk table entries are written at the *end* of a creating a chunk. When we
// read the chunk table, we stick an entry at the front indicating the start of
// the first chunk. The last chunk table entry points to the end of the chunks
// and has a count value equal to the total number of points.
if (chunk == m_chunks.end() || nextChunk == m_chunks.end())
return false;

m_chunkPointsTotal = nextChunk->count - chunk->count;
return true;
}

bool chunkDone() const
{ return m_chunkPointsRead == m_chunkPointsTotal; }

std::istream& m_stream;
lazperf::InFileStream m_fileStream;
lazperf::las_decompressor::ptr m_decompressor;
int m_format;
int m_pointLen;
int m_ebCount;
uint32_t m_chunkPointsRead;
uint64_t m_pointCount;
lazperf::laz_vlr m_vlr;
// Note that these offsets are actual file offsets. The values stored in the chunk table
// are chunk sizes.
std::vector<uint64_t> m_chunkOffsets;
int m_chunkPointsTotal;
int m_chunkPointsRead;

ChunkList m_chunks;
ChunkIter m_curChunk;
};

LazPerfVlrDecompressor::LazPerfVlrDecompressor(std::istream& stream, int format,
int ebCount, std::streamoff pointOffset, const char *vlrdata) :
m_impl(new LazPerfVlrDecompressorImpl(stream, format, ebCount, pointOffset, vlrdata))
LazPerfVlrDecompressor::LazPerfVlrDecompressor(std::istream& stream,
const LasHeader& header, const char *vlrdata) :
m_impl(new LazPerfVlrDecompressorImpl(stream, header, vlrdata))
{}


LazPerfVlrDecompressor::~LazPerfVlrDecompressor()
{}


void LazPerfVlrDecompressor::decompress(char *outbuf)
bool LazPerfVlrDecompressor::decompress(char *outbuf)
{
m_impl->decompress(outbuf);
return m_impl->decompress(outbuf);
}

bool LazPerfVlrDecompressor::seek(int64_t record)
bool LazPerfVlrDecompressor::seek(uint64_t record)
{
return m_impl->seek(record);
}
Expand Down
10 changes: 5 additions & 5 deletions pdal/compression/LazPerfVlrCompression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

namespace pdal
{
class LazPerfVlrCompressorImpl;
class LasHeader;

// This compressor write data in chunks to a stream. At the beginning of the
// data is an offset to the end of the data, where the chunk table is
Expand All @@ -50,6 +50,7 @@ namespace pdal
// The compressor uses the schema of the point data in order to compress
// the point stream. The schema is also stored in a VLR that isn't
// handled as part of the compression process itself.
class LazPerfVlrCompressorImpl;
class LazPerfVlrCompressor
{
public:
Expand All @@ -70,12 +71,11 @@ class LazPerfVlrDecompressorImpl;
class LazPerfVlrDecompressor
{
public:
LazPerfVlrDecompressor(std::istream& stream, int format, int ebCount,
std::streamoff pointOffset, const char *virdata);
LazPerfVlrDecompressor(std::istream& stream, const LasHeader& header, const char *vlrdata);
~LazPerfVlrDecompressor();

bool seek(int64_t record);
void decompress(char *outbuf);
bool seek(uint64_t record);
bool decompress(char *outbuf);

private:
std::unique_ptr<LazPerfVlrDecompressorImpl> m_impl;
Expand Down
Binary file added test/data/laz/ellipsoid.copc.laz
Binary file not shown.
Loading

0 comments on commit 141c07e

Please sign in to comment.