public
Description: PPM (Predict by Partial Matching) compression algorithm implementation for Python
Homepage:
Clone URL: git://github.com/pluskid/pyppm.git
pluskid (author)
Mon Nov 03 08:51:29 -0800 2008
commit  e94a82cbef67437ebc932276b2db7150459a12d8
tree    639a12e471e8742eb937ae2ea68e2d241ba3876b
parent  ea2cf61c65976ae2bc407d3c7b7e40ae6b1c317e
pyppm / ppm_model.h
100644 211 lines (174 sloc) 4.677 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#ifndef _PPM_MODEL_H_
#define _PPM_MODEL_H_
 
#include <cstring>
 
#include "config.h"
#include "buffer.h"
#include "trie.h"
#include "arithmetic_encoder.h"
#include "arithmetic_decoder.h"
 
struct PPMModel
{
    Trie m_contexts[Max_no_contexts+1];
    Buffer m_buffer;
    int m_refcount;
 
    PPMModel()
        :m_refcount(1) {
    }
 
    void incref() {
        ++m_refcount;
    }
    void decref() {
        if (--m_refcount == 0) {
            delete this;
        }
    }
    
    void update_contexts(symbol_t sym) {
        int ictx = 1;
        int offset = m_buffer.length() - 1;
        while (offset >= 0) {
            m_contexts[ictx].update_model(m_buffer, offset, sym);
            offset--;
            ictx++;
        }
    }
 
    static void dump(PPMModel *model, FILE *f) {
        for (int i = 0; i < Max_no_contexts+1; ++i) {
            model->m_contexts[i].dump(f);
        }
    }
    static PPMModel *load(FILE *f) {
        PPMModel *model = new PPMModel();
        for (int i = 0; i < Max_no_contexts+1; ++i) {
            model->m_contexts[i].load(f);
        }
        return model;
    }
};
 
class DefaultContextUpdater
{
public:
    void do_context_update(PPMModel *model, symbol_t sym) {
        model->update_contexts(sym);
    }
};
class NopeContextUpdater
{
public:
    void do_context_update(PPMModel *, symbol_t) {
    }
};
 
template<typename Adapter, typename ContextUpdater>
class PPMEncoder: public ContextUpdater
{
private:
    ArithmeticEncoder<Adapter> *m_encoder;
    PPMModel *m_model;
    
    void uni_encode(wsymbol_t sym) {
        m_encoder->encode(sym, sym+1, No_of_symbols);
    }
 
public:
    PPMEncoder(Adapter &ad)
        :m_encoder(new ArithmeticEncoder<Adapter>(ad)),
         m_model(new PPMModel()) {
    }
 
    PPMEncoder(Adapter &ad, PPMModel *model)
        :m_encoder(new ArithmeticEncoder<Adapter>(ad)),
         m_model(model) {
        m_model->incref();
    }
 
    ~PPMEncoder() {
        delete m_encoder;
        m_model->decref();
    }
 
    PPMModel *model() {
        return m_model;
    }
 
    void start_encoding() {
        // Do nothing
    }
    
    void encode(wsymbol_t sym) {
        int ictx = m_model->m_buffer.length();
 
        if (ictx == 0) {
            uni_encode(sym);
        } else {
            
            for (int i = 0; ictx > 0; --ictx, ++i) {
                if (m_model->m_contexts[ictx].encode(m_encoder,
                                                     m_model->m_buffer, i, sym)) {
                    break; // predict success
                }
            }
 
            if (ictx == 0)
                uni_encode(sym);
        }
 
        if (sym != EOF_symbol) {
            this->do_context_update(m_model, sym);
            m_model->m_buffer << sym;
        }
    }
 
    void finish_encoding() {
        encode(EOF_symbol);
        m_encoder->finish_encoding();
    }
};
 
template<typename Adapter, typename ContextUpdater>
class PPMDecoder: public ContextUpdater
{
private:
    ArithmeticDecoder<Adapter> *m_decoder;
    PPMModel *m_model;
    
    wsymbol_t uni_decode() {
        code_value cum;
        wsymbol_t sym;
        cum = m_decoder->get_cum_freq(No_of_symbols);
        assert(cum >= 0 && cum < No_of_symbols);
        m_decoder->pop_symbol(cum, cum+1, No_of_symbols);
        sym = cum;
 
        return sym;
    }
    
public:
    PPMDecoder(Adapter &ad)
        :m_decoder(new ArithmeticDecoder<Adapter>(ad)),
         m_model(new PPMModel()) {
    }
 
    PPMDecoder(Adapter &ad, PPMModel *model)
        :m_decoder(new ArithmeticDecoder<Adapter>(ad)),
         m_model(model) {
        m_model->incref();
    }
 
    ~PPMDecoder() {
        delete m_decoder;
        m_model->decref();
    }
 
    PPMModel *model() {
        return m_model;
    }
    
    void start_decoding() {
        m_decoder->start_decoding();
    }
    
    wsymbol_t decode() {
        int ictx = m_model->m_buffer.length();
        wsymbol_t symbol;
 
        if (ictx == 0) {
            symbol = uni_decode();
        } else {
            for (int i = 0; ictx > 0; --ictx, ++i) {
                symbol = m_model->m_contexts[ictx].decode(m_decoder,
                                                          m_model->m_buffer, i);
                if (symbol != ESC_symbol) {
                    break;
                }
            }
 
            if (ictx == 0)
                symbol = uni_decode();
        }
 
        if (symbol != EOF_symbol) {
            this->do_context_update(m_model, symbol);
            m_model->m_buffer << symbol;
        }
        return symbol;
    }
 
    void finish_decoding() {
        // Do nothing
    }
};
 
 
#endif /* _PPM_MODEL_H_ */