/
Huffman.h
257 lines (214 loc) · 8.93 KB
/
Huffman.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#ifndef _HUFFMAN_H_
#define _HUFFMAN_H_
#include <functional>
#include <string>
#include <sstream>
#include <cassert>
#include <stdexcept>
#include <vector>
#include <queue>
#include <memory>
#include <iostream>
#include "ostreambin.h"
#include "istreambin.h"
namespace bw {
class Node
{
public:
unsigned char ch;
int freq;
std::unique_ptr<Node> left, right;
Node(char _ch, int _freq, std::unique_ptr<Node> &_left, std::unique_ptr<Node> &_right)
:ch(_ch)
,freq(_freq)
,left(std::move(_left))
,right(std::move(_right))
{
}
Node(char _ch, int _freq, std::unique_ptr<Node> &&_left, std::unique_ptr<Node> &&_right)
:ch(_ch)
,freq(_freq)
,left(std::move(_left))
,right(std::move(_right))
{
}
Node(char _ch, int _freq)
:ch(_ch)
,freq(_freq)
{
}
Node(std::unique_ptr<Node> & that)
:ch(that->ch)
,freq(that->freq)
,left(std::move(that->left))
,right(std::move(that->right))
{
}
// compare, based on frequency
int compareTo(const Node &that) const
{
return this->freq - that.freq;
}
int compareTo(const Node *that) const
{
return this->freq - that->freq;
}
// is the node a leaf node?
bool isLeaf() {
assert (((left == nullptr) && (right == nullptr)) || ((left != nullptr) && (right != nullptr)));
return (left == nullptr) && (right == nullptr);
}
};
typedef unsigned char UCHAR;
typedef std::unique_ptr<Node> Node_ptr;
bool operator<(const bw::Node_ptr &a, const bw::Node_ptr &b);
class Huffman {
// alphabet size of extended ASCII
private:
static const int R = 256;
private:
// Do not instantiate.
Huffman()
{
}
// Huffman trie node
/*
* Reads a sequence of 8-bit bytes from standard input; compresses them
* using Huffman codes with an 8-bit alphabet; and writes the results
* to standard output.
*/
public:
void static compress(istreambin &streamin, ostreambin &streamout) {
// read the input
std::string input(std::istreambuf_iterator<char>(*(streamin.getStream())), {});
// tabulate frequency counts
std::vector<int> freq(R);
for (int i = 0; i < input.size(); i++)
{
assert(((UCHAR)input[i]) >= 0);
freq[((UCHAR)input[i])]++;
}
// build Huffman trie
Node_ptr root(buildTrie(freq));
// build code table
std::vector<std::string> st(R);
buildCode(st, root, "");
// print trie for decoder
writeTrie(root, streamout);
// print number of bytes in original uncompressed message
int input_size = input.size();
streamout.write(reinterpret_cast<const char*>(&input_size), sizeof(input_size));
// use Huffman code to encode input
for (int i = 0; i < input.size(); i++) {
std::string code = st[((UCHAR)input[i])];
for (int j = 0; j < code.size(); j++) {
if (code.at(j) == '0') {
streamout.write(false);
}
else if (code.at(j) == '1') {
streamout.write(true);
}
else
{
std::cout << "Code:" << code << std::endl;
throw std::invalid_argument("Illegal state");
}
}
}
// close output stream
streamout.flush();
}
private:
// build the Huffman trie given frequencies
static Node *buildTrie(const std::vector<int> &freq) {
// initialze priority queue with singleton trees
std::priority_queue<Node_ptr, std::vector<Node_ptr>, std::greater<Node_ptr>> pq;
for (int i = 0; i < R; i++)
if (freq[i] > 0)
pq.emplace(new Node(i, freq[i]));
// special case in case there is only one character with a nonzero frequency
if (pq.size() == 1) {
if (freq['\0'] == 0) pq.emplace(new Node('\0', 0));
else pq.emplace(new Node('\1', 0));
}
// merge two smallest trees
while (pq.size() > 1) {
Node_ptr left(std::move(const_cast<Node_ptr&>(pq.top()))); pq.pop();
Node_ptr right(std::move(const_cast<Node_ptr&>(pq.top()))); pq.pop();
auto freq = left->freq + right->freq;
pq.emplace(new Node('\0', freq, left, right));
}
Node_ptr ret = std::move(const_cast<Node_ptr&>(pq.top())); pq.pop();
return ret.release();
}
// write bitstring-encoded trie to standard output
void static writeTrie(const Node_ptr &x, ostreambin &streamout) {
if (x->isLeaf()) {
//BinaryStdOut.write(true);
//BinaryStdOut.write(x.ch, 8);
streamout.write(true);
streamout.write((char)x->ch);
return;
}
streamout.write(false);
writeTrie(x->left, streamout);
writeTrie(x->right, streamout);
}
// make a lookup table from symbols and their encodings
void static buildCode(std::vector<std::string> &st, Node_ptr const &x, const std::string &s) {
if (!x->isLeaf()) {
buildCode(st, x->left, s + "0");
buildCode(st, x->right, s + "1");
}
else {
st[x->ch] = s;
}
}
/**
* Reads a sequence of bits that represents a Huffman-compressed message from
* standard input; expands them; and writes the results to standard output.
**/
public:
void static expand(istreambin &streamin, ostreambin &streamout) {
streamin.fillbuffer();
// read in Huffman trie from input stream
Node_ptr root(readTrie(streamin));
// number of bytes to write
int length;
streamin.read(reinterpret_cast<char *>(&length), sizeof(length));
std::stringstream ss;
// decode using the Huffman trie
for (int i = 0; i < length; i++) {
Node *x = root.get();
while (!x->isLeaf()) {
bool bit;
streamin.read(bit);
if (bit) x = x->right.get();
else x = x->left.get();
}
//streamout.getStream()->put(x->ch);
ss.put(x->ch);
}
(*streamout.getStream()) << ss.rdbuf();
streamout.flush();
}
private:
static Node *readTrie(istreambin &streamin) {
bool isLeaf;
streamin.read(isLeaf);
Node_ptr ret;
if (isLeaf) {
char c;
streamin.read(c);
ret.reset(new Node(c, -1));
}
else {
Node_ptr left(readTrie(streamin));
Node_ptr right(readTrie(streamin));
ret.reset(new Node('\0', -1, left, right));
}
return ret.release();
}
};
}
#endif