PPMC

In [1]:
from tabulate import tabulate

def init_table(K_max):
    table = [{'':{'$':0}}]
    for _ in range(1, K_max+1):
        table.append({})
    return table



def append_symbol_to_table(text, table, s):
    """
    text
    Table table - [{contex:{symbol:count}}]
    symbol s
    """
    for i in range(len(table)):
        context = '' if i == 0 else text[-i:]
        
        if context in table[i]:
            if s in table[i][context]:
                table[i][context][s] += 1
            else:
                table[i][context][s] = 1
                table[i][context]['$'] += 1
        elif len(context) == i:
            table[i][context] = {s:1, '$':1}
    
    return table



def encode_symbol(text, table, s):
    """
    text
    Table table - [{contex:{symbol:count}}]
    symbol s
    """
    e = []
    for k in range(len(table)-1,-2,-1):
        if k == -1:
            e.append((s, k))
            break

        context = '' if k == 0 else text[-k:]

        if context in table[k]:
            if s in table[k][context]:
                e.append((s, k))
                break
            else:
                e.append(('$', k))
        else:
            e.append(('$', k))
    
    return e


def encode(text, K_max):
    """
    PPMC Encoding
    Input: text to encode and K-max
    Output: encoded text, current table of ppm and table (tabulate) for encoding
    """
    output_table = []
    encoded_text = ""

    table = init_table(K_max)
    for i in range(len(text)):
        
        # encode
        e = encode_symbol(text[:i], table, text[i])
        output_table += e
        encoded_text += "".join([symbol for symbol,_ in e])
        
        # update table
        table = append_symbol_to_table(text[:i], table, text[i])
    
    return encoded_text, table, tabulate(output_table, headers=["Encoded symbol", "Order"], tablefmt="pretty")

In [2]:
# Example for PDF 9 slide 15:
K_max = 2
table = init_table(K_max)
text = "ACCBACCACBA"
for i in range(len(text)):
    table = append_symbol_to_table(text[:i], table, text[i])

print(table)

[{'': {'$': 3, 'A': 4, 'C': 5, 'B': 2}}, {'A': {'C': 3, '$': 1}, 'C': {'C': 2, '$': 3, 'B': 2, 'A': 1}, 'B': {'A': 2, '$': 1}}, {'AC': {'C': 2, '$': 2, 'B': 1}, 'CC': {'B': 1, '$': 2, 'A': 1}, 'CB': {'A': 2, '$': 1}, 'BA': {'C': 1, '$': 1}, 'CA': {'C': 1, '$': 1}}]


In [3]:
# Example from PDF 9 slide 16:
K_max = 2
text = "abracadabra"
encoded_text, _, output_table = encode(text, K_max)
print(output_table)
print("Encoded text: {}".format(encoded_text))

+----------------+-------+
| Encoded symbol | Order |
+----------------+-------+
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       a        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       b        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       r        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       a        |   0   |
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       c        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       a        |   0   |
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       d        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       a        |   0   |
|       $        |   2   |
|       b        |   1   |
|       r        |   2   |
|       a        |   2   |
+----------------+-------+
E

In [4]:
# Example from PDF 9 slide 18:
K_max = 2
text = "aabbaabb"
encoded_text, _, output_table = encode(text, K_max)
print(output_table)
print("Encoded text: {}".format(encoded_text))

+----------------+-------+
| Encoded symbol | Order |
+----------------+-------+
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       a        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       a        |   0   |
|       $        |   2   |
|       $        |   1   |
|       $        |   0   |
|       b        |  -1   |
|       $        |   2   |
|       $        |   1   |
|       b        |   0   |
|       $        |   2   |
|       $        |   1   |
|       a        |   0   |
|       $        |   2   |
|       a        |   1   |
|       b        |   2   |
|       b        |   2   |
+----------------+-------+
Encoded text: $$$a$$a$$$b$$b$$a$abb


In [5]:
# Example from PDF 9 slide 19:
K_max = 2
text = "abracadabra"
_, inisde_table, _ = encode(text, K_max)
print(inisde_table)

[{'': {'$': 5, 'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1}}, {'a': {'b': 2, '$': 3, 'c': 1, 'd': 1}, 'b': {'r': 2, '$': 1}, 'r': {'a': 2, '$': 1}, 'c': {'a': 1, '$': 1}, 'd': {'a': 1, '$': 1}}, {'ab': {'r': 2, '$': 1}, 'br': {'a': 2, '$': 1}, 'ra': {'c': 1, '$': 1}, 'ac': {'a': 1, '$': 1}, 'ca': {'d': 1, '$': 1}, 'ad': {'a': 1, '$': 1}, 'da': {'b': 1, '$': 1}}]


In [6]:
# Example from PDF 9 slide 20:
K_max = 1
text = "aabb"
encoded_text, _, output_table = encode(text, K_max)
print(output_table)
print("Encoded text: {}".format(encoded_text))

+----------------+-------+
| Encoded symbol | Order |
+----------------+-------+
|       $        |   1   |
|       $        |   0   |
|       a        |  -1   |
|       $        |   1   |
|       a        |   0   |
|       $        |   1   |
|       $        |   0   |
|       b        |  -1   |
|       $        |   1   |
|       b        |   0   |
+----------------+-------+
Encoded text: $$a$a$$b$b


In [7]:
# Example from PDF 9 slide 25:
K_max = 1
text = "aabb"
_, inisde_table, _ = encode(text, K_max)
print(inisde_table)

[{'': {'$': 2, 'a': 2, 'b': 2}}, {'a': {'a': 1, '$': 2, 'b': 1}, 'b': {'b': 1, '$': 1}}]


In [8]:
# Example from PDF 9 slide 27:
K_max = 1
text = "aabbc"
encoded_text, inisde_table, _ = encode(text, K_max)
print("Encoded text: {}".format(encoded_text))
print(inisde_table)

Encoded text: $$a$a$$b$b$$c
[{'': {'$': 3, 'a': 2, 'b': 2, 'c': 1}}, {'a': {'a': 1, '$': 2, 'b': 1}, 'b': {'b': 1, '$': 2, 'c': 1}}]
