Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
build: build-huffman build-arithmetic build-range build-rle

build-huffman:
g++ -std=c++17 -O2 -Wall -Wextra -Werror -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/huffman/cpp/main.cpp -o algorithms/huffman/cpp/huffman_cpp
g++ -std=c++17 -O2 -Wall -Wextra -Werror -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/shared/cpp/src/frequency_table.cpp algorithms/huffman/cpp/main.cpp -o algorithms/huffman/cpp/huffman_cpp
go build -o algorithms/huffman/go/huffman_go ./algorithms/huffman/go/cmd
cargo build --manifest-path algorithms/huffman/rust/Cargo.toml --bin huffman_rust --release
cp algorithms/huffman/rust/target/release/huffman_rust algorithms/huffman/rust/huffman_rust

build-arithmetic:
g++ -std=c++17 -O2 -Wall -Wextra -Werror -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/arithmetic/cpp/main.cpp -o algorithms/arithmetic/cpp/arithmetic_cpp
g++ -std=c++17 -O2 -Wall -Wextra -Werror -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/shared/cpp/src/frequency_table.cpp algorithms/arithmetic/cpp/main.cpp -o algorithms/arithmetic/cpp/arithmetic_cpp
go build -o algorithms/arithmetic/go/arithmetic_go ./algorithms/arithmetic/go/cmd
cargo build --manifest-path algorithms/arithmetic/rust/Cargo.toml --bin arithmetic_rust --release
cp algorithms/arithmetic/rust/target/release/arithmetic_rust algorithms/arithmetic/rust/arithmetic_rust
Expand All @@ -40,7 +40,7 @@ test: test-data \
test-conformance test-cli-smoke

test-shared-cpp:
g++ -std=c++17 -O2 -Wall -Wextra -Werror -DCOMPRESSKIT_NO_MAIN -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/huffman/cpp/main.cpp algorithms/arithmetic/cpp/main.cpp algorithms/range/cpp/main.cpp algorithms/rle/cpp/main.cpp algorithms/shared/cpp/tests/test_lifecycle.cpp -o algorithms/shared/cpp/tests/test_lifecycle
g++ -std=c++17 -O2 -Wall -Wextra -Werror -DCOMPRESSKIT_NO_MAIN -Ialgorithms/shared/cpp/include algorithms/shared/cpp/src/buffer_api.cpp algorithms/shared/cpp/src/cli_launcher.cpp algorithms/shared/cpp/src/frequency_table.cpp algorithms/huffman/cpp/main.cpp algorithms/arithmetic/cpp/main.cpp algorithms/range/cpp/main.cpp algorithms/rle/cpp/main.cpp algorithms/shared/cpp/tests/test_lifecycle.cpp -o algorithms/shared/cpp/tests/test_lifecycle
./algorithms/shared/cpp/tests/test_lifecycle

test-shared-go:
Expand Down
38 changes: 19 additions & 19 deletions algorithms/arithmetic/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>

#include "compresskit/buffer_api.hpp"
#include "compresskit/frequency_table.hpp"

class BitWriter {
public:
Expand Down Expand Up @@ -242,10 +243,17 @@ static std::vector<uint32_t> build_frequencies_from_file(const std::string& inpu
if (!in) {
return freq;
}
char c;
while (in.get(c)) {
unsigned char uc = static_cast<unsigned char>(c);
freq[static_cast<uint32_t>(uc)]++;
uint32_t overflow_symbol = 0;
const auto status = compresskit::accumulate_frequencies(in, freq, &overflow_symbol);
if (status == compresskit::FrequencyCountStatus::IO_ERROR) {
std::cerr << "Failed to read input file\n";
freq.clear();
return freq;
}
if (status == compresskit::FrequencyCountStatus::OVERFLOW) {
std::cerr << "Frequency overflow for symbol " << overflow_symbol << "\n";
freq.clear();
return freq;
}
freq[EOF_SYMBOL] = 1;
scale_frequencies(freq);
Expand All @@ -266,30 +274,20 @@ static std::vector<uint32_t> build_cumulative(const std::vector<uint32_t>& freq)
}

static void write_frequencies(std::ostream& out, const std::vector<uint32_t>& freq) {
uint32_t count = static_cast<uint32_t>(freq.size());
out.write(reinterpret_cast<const char*>(&count), sizeof(count));
for (uint32_t v : freq) {
out.write(reinterpret_cast<const char*>(&v), sizeof(v));
}
compresskit::write_frequency_table(out, freq);
}

static bool read_frequencies(std::istream& in, std::vector<uint32_t>& freq) {
uint32_t count = 0;
in.read(reinterpret_cast<char*>(&count), sizeof(count));
if (!in) {
const auto status = compresskit::read_frequency_table(in, freq, SYMBOL_LIMIT, &count);
if (status == compresskit::FrequencyTableReadStatus::TRUNCATED) {
std::cerr << "Failed to read frequency table\n";
return false;
}
if (count != SYMBOL_LIMIT) {
if (status == compresskit::FrequencyTableReadStatus::BAD_COUNT) {
std::cerr << "Bad frequency table size: " << count << "\n";
return false;
}
freq.assign(count, 0);
in.read(reinterpret_cast<char*>(freq.data()), freq.size() * sizeof(uint32_t));
if (!in) {
std::cerr << "Failed to read frequency table\n";
return false;
}
return true;
}

Expand All @@ -305,8 +303,10 @@ static bool compress_file(const std::string& input_path, const std::string& outp
}
}
}

std::vector<uint32_t> freq = build_frequencies_from_file(input_path);
if (freq.empty()) {
return false;
}
std::vector<uint32_t> cumulative = build_cumulative(freq);

std::ifstream in(input_path, std::ios::binary);
Expand Down
37 changes: 19 additions & 18 deletions algorithms/huffman/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <vector>

#include "compresskit/buffer_api.hpp"
#include "compresskit/frequency_table.hpp"

class BitWriter {
public:
Expand Down Expand Up @@ -179,40 +180,37 @@ static std::vector<uint32_t> build_frequencies_from_file(const std::string& inpu
if (!in) {
return freq;
}
char c;
while (in.get(c)) {
unsigned char uc = static_cast<unsigned char>(c);
freq[static_cast<uint32_t>(uc)]++;
uint32_t overflow_symbol = 0;
const auto status = compresskit::accumulate_frequencies(in, freq, &overflow_symbol);
if (status == compresskit::FrequencyCountStatus::IO_ERROR) {
std::cerr << "Failed to read input file\n";
freq.clear();
return freq;
}
if (status == compresskit::FrequencyCountStatus::OVERFLOW) {
std::cerr << "Frequency overflow for symbol " << overflow_symbol << "\n";
freq.clear();
return freq;
}
freq[EOF_SYMBOL] = 1;
return freq;
}

static void write_frequencies(std::ostream& out, const std::vector<uint32_t>& freq) {
uint32_t count = static_cast<uint32_t>(freq.size());
out.write(reinterpret_cast<const char*>(&count), sizeof(count));
for (uint32_t v : freq) {
out.write(reinterpret_cast<const char*>(&v), sizeof(v));
}
compresskit::write_frequency_table(out, freq);
}

static bool read_frequencies(std::istream& in, std::vector<uint32_t>& freq) {
uint32_t count = 0;
in.read(reinterpret_cast<char*>(&count), sizeof(count));
if (!in) {
const auto status = compresskit::read_frequency_table(in, freq, SYMBOL_LIMIT, &count);
if (status == compresskit::FrequencyTableReadStatus::TRUNCATED) {
std::cerr << "Failed to read frequency table\n";
return false;
}
if (count != SYMBOL_LIMIT) {
if (status == compresskit::FrequencyTableReadStatus::BAD_COUNT) {
std::cerr << "Bad frequency table size: " << count << "\n";
return false;
}
freq.assign(count, 0);
in.read(reinterpret_cast<char*>(freq.data()), freq.size() * sizeof(uint32_t));
if (!in) {
std::cerr << "Failed to read frequency table\n";
return false;
}
return true;
}

Expand All @@ -230,6 +228,9 @@ static bool compress_file(const std::string& input_path, const std::string& outp
}

std::vector<uint32_t> freq = build_frequencies_from_file(input_path);
if (freq.empty()) {
return false;
}
UniqueNode root(build_tree(freq)); // RAII: automatic cleanup
std::vector<std::string> codes(SYMBOL_LIMIT);
std::string prefix;
Expand Down
31 changes: 31 additions & 0 deletions algorithms/shared/cpp/include/compresskit/frequency_table.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#pragma once

#include <cstdint>
#include <istream>
#include <ostream>
#include <vector>

namespace compresskit {

enum class FrequencyTableReadStatus {
OK = 0,
TRUNCATED,
BAD_COUNT,
};

enum class FrequencyCountStatus {
OK = 0,
IO_ERROR,
OVERFLOW,
};

bool write_frequency_table(std::ostream& out, const std::vector<uint32_t>& freq);

FrequencyTableReadStatus read_frequency_table(std::istream& in, std::vector<uint32_t>& freq,
uint32_t expected_count,
uint32_t* actual_count = nullptr);

FrequencyCountStatus accumulate_frequencies(std::istream& in, std::vector<uint32_t>& freq,
uint32_t* overflow_symbol = nullptr);

} // namespace compresskit
95 changes: 95 additions & 0 deletions algorithms/shared/cpp/src/frequency_table.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include "compresskit/frequency_table.hpp"

#include <array>
#include <limits>

namespace compresskit {
namespace {

bool write_u32_le(std::ostream& out, uint32_t value) {
const std::array<char, 4> bytes = {
static_cast<char>(value & 0xFFu),
static_cast<char>((value >> 8) & 0xFFu),
static_cast<char>((value >> 16) & 0xFFu),
static_cast<char>((value >> 24) & 0xFFu),
};
out.write(bytes.data(), static_cast<std::streamsize>(bytes.size()));
return static_cast<bool>(out);
}

bool read_u32_le(std::istream& in, uint32_t& value) {
std::array<unsigned char, 4> bytes{};
in.read(reinterpret_cast<char*>(bytes.data()), static_cast<std::streamsize>(bytes.size()));
if (!in) {
return false;
}
value = static_cast<uint32_t>(bytes[0]) | (static_cast<uint32_t>(bytes[1]) << 8) |
(static_cast<uint32_t>(bytes[2]) << 16) | (static_cast<uint32_t>(bytes[3]) << 24);
return true;
}

} // namespace

bool write_frequency_table(std::ostream& out, const std::vector<uint32_t>& freq) {
if (!write_u32_le(out, static_cast<uint32_t>(freq.size()))) {
return false;
}
for (uint32_t value : freq) {
if (!write_u32_le(out, value)) {
return false;
}
}
return true;
}

FrequencyTableReadStatus read_frequency_table(std::istream& in, std::vector<uint32_t>& freq,
uint32_t expected_count, uint32_t* actual_count) {
uint32_t count = 0;
if (!read_u32_le(in, count)) {
freq.clear();
return FrequencyTableReadStatus::TRUNCATED;
}
if (actual_count) {
*actual_count = count;
}
if (expected_count != 0 && count != expected_count) {
freq.clear();
return FrequencyTableReadStatus::BAD_COUNT;
}

freq.assign(count, 0);
for (uint32_t& value : freq) {
if (!read_u32_le(in, value)) {
freq.clear();
return FrequencyTableReadStatus::TRUNCATED;
}
}
return FrequencyTableReadStatus::OK;
}

FrequencyCountStatus accumulate_frequencies(std::istream& in, std::vector<uint32_t>& freq,
uint32_t* overflow_symbol) {
std::array<unsigned char, 32 * 1024> buffer{};
for (;;) {
in.read(reinterpret_cast<char*>(buffer.data()), static_cast<std::streamsize>(buffer.size()));
const std::streamsize read_count = in.gcount();
for (std::streamsize i = 0; i < read_count; ++i) {
const uint32_t symbol = static_cast<uint32_t>(buffer[static_cast<std::size_t>(i)]);
if (freq[symbol] == std::numeric_limits<uint32_t>::max()) {
if (overflow_symbol) {
*overflow_symbol = symbol;
}
return FrequencyCountStatus::OVERFLOW;
}
++freq[symbol];
}
if (in.eof()) {
return FrequencyCountStatus::OK;
}
if (!in) {
return FrequencyCountStatus::IO_ERROR;
}
}
}

} // namespace compresskit
65 changes: 65 additions & 0 deletions algorithms/shared/cpp/tests/test_lifecycle.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <sstream>
#include <string>
#include <vector>

#include "compresskit/algorithms.hpp"
#include "compresskit/frequency_table.hpp"

namespace {

Expand Down Expand Up @@ -129,6 +131,65 @@ void test_decode_buffer_preserves_finish_retry_prefix() {
assert(std::string(decoded.value.begin(), decoded.value.end()) == "uvwxyz");
}

void test_write_frequency_table_uses_little_endian_layout() {
std::ostringstream out(std::ios::binary);
const std::vector<uint32_t> freq = {0x78563412u, 0x01020304u};

const bool ok = compresskit::write_frequency_table(out, freq);
assert(ok);

const std::string bytes = out.str();
const std::string expected(
"\x02\x00\x00\x00"
"\x12\x34\x56\x78"
"\x04\x03\x02\x01",
12);
assert(bytes == expected);
}

void test_read_frequency_table_decodes_little_endian_values() {
const std::string bytes(
"\x02\x00\x00\x00"
"\x12\x34\x56\x78"
"\x04\x03\x02\x01",
12);
std::istringstream in(bytes, std::ios::binary);
std::vector<uint32_t> freq;
uint32_t actual_count = 0;

const auto status = compresskit::read_frequency_table(in, freq, 2, &actual_count);

assert(status == compresskit::FrequencyTableReadStatus::OK);
assert(actual_count == 2);
assert((freq == std::vector<uint32_t>{0x78563412u, 0x01020304u}));
}

void test_read_frequency_table_reports_bad_count() {
const std::string bytes("\x02\x00\x00\x00", 4);
std::istringstream in(bytes, std::ios::binary);
std::vector<uint32_t> freq;
uint32_t actual_count = 0;

const auto status = compresskit::read_frequency_table(in, freq, 3, &actual_count);

assert(status == compresskit::FrequencyTableReadStatus::BAD_COUNT);
assert(actual_count == 2);
assert(freq.empty());
}

void test_accumulate_frequencies_reports_overflow() {
std::vector<uint32_t> freq(257, 0);
freq[0] = UINT32_MAX;
std::istringstream in(std::string(1, '\0'), std::ios::binary);
uint32_t overflow_symbol = UINT32_MAX;

const auto status = compresskit::accumulate_frequencies(in, freq, &overflow_symbol);

assert(status == compresskit::FrequencyCountStatus::OVERFLOW);
assert(overflow_symbol == 0);
assert(freq[0] == UINT32_MAX);
}

} // namespace

int main() {
Expand All @@ -145,6 +206,10 @@ int main() {

test_encode_buffer_preserves_finish_retry_prefix();
test_decode_buffer_preserves_finish_retry_prefix();
test_write_frequency_table_uses_little_endian_layout();
test_read_frequency_table_decodes_little_endian_values();
test_read_frequency_table_reports_bad_count();
test_accumulate_frequencies_reports_overflow();

return 0;
}
Loading