In [6]:
import numpy as np
import pandas as pd

In [7]:
def init_ranges(data,cumulative):
    ranges = []
    for i in range(len(data)):
        symbol_range = []
        symbol_range.append(data[i][0])
        if i > 0:
            min_range = cumulative[i-1]
        else:
            min_range = 0

        max_range = cumulative[i]
        symbol_range.append(min_range)
        symbol_range.append(max_range)
        ranges.append(symbol_range)
    return ranges



In [8]:
def show_ranges(ranges):
    ranges_df = pd.DataFrame(ranges,columns=['Symbol','start range', 'end range'])
    print("------------ RANGES ------------")
    print(ranges_df)
    


In [9]:
def update_ranges(old_ranges,new_start, new_end,DECIMALS=5):
    #new val(C(s)) = new_start + C(s) * delta
    delta = float(new_end) - float(new_start)
    ranges = np.copy(old_ranges)
    for x in ranges:
        # symbol = x[0]
        old_min = np.round(float(x[1]),decimals= DECIMALS)
        old_max = np.round(float(x[2]),decimals= DECIMALS)

        new_min = float(new_start) + float(old_min) * delta
        new_min = np.round(new_min, decimals= DECIMALS)
        new_max = float(new_start) + float(old_max) * delta
        new_max = np.round(new_max, decimals= DECIMALS)

        x[1] = new_min
        x[2] = new_max

    return ranges

In [10]:
def encode_arithmetic(file, ranges, DECIMALS = 5):
    current_min_code = -1
    current_max_code = -1
    
    init_ranges = np.copy(ranges)
    current_ranges = init_ranges
    for char in file:
        for x in current_ranges:
            symbol = x[0]
            min_range = np.round(float(x[1]),decimals= DECIMALS)
            max_range = np.round(float(x[2]),decimals= DECIMALS)
            if symbol == char:
                print(f"symbol: {symbol} \nmin= {min_range}\nmax = {max_range}")
                current_min_code = min_range
                current_max_code = max_range
                current_ranges = update_ranges(init_ranges, min_range, max_range)
                show_ranges(current_ranges)
                print("==============================================")
    print("END")
    return current_min_code, current_max_code


In [11]:
def arithmetic_to_binary(num, min_range, max_range):
    binary_string = "0."
    x = 0
    counter = -1
    while(True):
        num = num * 2
        int_part = int(num)
        num -= int_part
        x += int_part * 2**(counter)
        binary_string += str(int_part)
        if (x > min_range and x < max_range):
            break
        counter -= 1
        print(x)
    return binary_string

### Usage

In [12]:
# data = [(symbol, count), ... ()]

#data = [('A',100), ('B', 100), ('C',100), ('D',500), ('E',200), ('F',100), ('G',50), ('H',50)]
#data = [('A',0.2), ('B', 0.3), ('C',0.25), ('D',0.25)]
data = [('A',0.05), ('B', 0.1), ('C',0.1), ('D',0.25), ('E',0.5)]

# 

In [13]:
data.sort(key=lambda a: a[1])
print(data)

[('A', 0.05), ('B', 0.1), ('C', 0.1), ('D', 0.25), ('E', 0.5)]


In [14]:
freq = [count for symbol,count in data]
prob = freq / np.sum(freq)
cumulative = np.cumsum(prob)
#cumulative = [0.4, 0.7, 1]


In [15]:
init_r = init_ranges(data,cumulative)
show_ranges(init_r)

------------ RANGES ------------
  Symbol  start range  end range
0      A         0.00       0.05
1      B         0.05       0.15
2      C         0.15       0.25
3      D         0.25       0.50
4      E         0.50       1.00


In [16]:
file = "ABD"
min_code, max_code = encode_arithmetic(file,init_r)

symbol: A 
min= 0.0
max = 0.05
------------ RANGES ------------
  Symbol start range end range
0      A         0.0    0.0025
1      B      0.0025    0.0075
2      C      0.0075    0.0125
3      D      0.0125     0.025
4      E       0.025      0.05
symbol: B 
min= 0.0025
max = 0.0075
------------ RANGES ------------
  Symbol start range end range
0      A      0.0025   0.00275
1      B     0.00275   0.00325
2      C     0.00325   0.00375
3      D     0.00375     0.005
4      E       0.005    0.0075
symbol: D 
min= 0.00375
max = 0.005
------------ RANGES ------------
  Symbol start range end range
0      A     0.00375   0.00381
1      B     0.00381   0.00394
2      C     0.00394   0.00406
3      D     0.00406   0.00438
4      E     0.00438     0.005
END


In [17]:
print(f"Arithmetic code for \"{file}\": \n\tminimum: {min_code}, \n\tmaximum: {max_code}")
code = (min_code + max_code) / 2
print(f"Code (average) = {code}")

Arithmetic code for "ABD": 
	minimum: 0.00375, 
	maximum: 0.005
Code (average) = 0.004375


### converting Float to binary code

In [18]:
binary_code = arithmetic_to_binary(code, min_code, max_code)
print(f"binary code = {binary_code}")

0.0
0.0
0.0
0.0
0.0
0.0
0.0
binary code = 0.00000001


## Decoding

In [19]:
binary_code = '0.01101'

In [20]:
str_code = binary_code.split('.')[1]
list(str_code)
str_code

'01101'

In [21]:
#works only on fractions
def bincode_to_decimal(code):
    str_code = binary_code.split('.')[1]
    number=0
    power = -1
    for digit in str_code:
        if int(digit) == 1:
            number += 2** power
        power -= 1
    return number
decimal_code = bincode_to_decimal(str_code)
print(f"number in decimals = {decimal_code}")

number in decimals = 0.40625


In [22]:
def decode_arithmetic(code,ranges):
    init_ranges = np.copy(ranges)
    current_ranges = init_ranges

    decoded_string = ""
    MAX_ITER = 100
    COUNT = 0
    while True:
        for x in current_ranges:
            symbol = x[0]
            min_range = float(x[1])
            max_range = float(x[2])
            if  ((code >= min_range) and (code < max_range)):
                print(f"decoded_symbol: {symbol} \nmin= {min_range}\nmax = {max_range}")
                decoded_string += symbol
                current_ranges = update_ranges(init_ranges, min_range, max_range)
                show_ranges(current_ranges)
                print("==============================================")
                if np.round(((min_range + max_range) / 2), decimals= 5) == np.round(code,decimals=5):
                    print("END")
                    return decoded_string
            COUNT += 1
            if COUNT == MAX_ITER:
                return "Forced Exit"



In [23]:
decoded_string = decode_arithmetic(decimal_code,init_r)
decoded_string

decoded_symbol: D 
min= 0.25
max = 0.5
------------ RANGES ------------
  Symbol start range end range
0      A        0.25    0.2625
1      B      0.2625    0.2875
2      C      0.2875    0.3125
3      D      0.3125     0.375
4      E       0.375       0.5
decoded_symbol: E 
min= 0.375
max = 0.5
------------ RANGES ------------
  Symbol start range end range
0      A       0.375   0.38125
1      B     0.38125   0.39375
2      C     0.39375   0.40625
3      D     0.40625    0.4375
4      E      0.4375       0.5
decoded_symbol: D 
min= 0.40625
max = 0.4375
------------ RANGES ------------
  Symbol start range end range
0      A     0.40625   0.40781
1      B     0.40781   0.41094
2      C     0.41094   0.41406
3      D     0.41406   0.42188
4      E     0.42188    0.4375
decoded_symbol: A 
min= 0.40625
max = 0.40781
------------ RANGES ------------
  Symbol start range end range
0      A     0.40625   0.40633
1      B     0.40633   0.40648
2      C     0.40648   0.40664
3      D     0.4

'Forced Exit'

### To DO

handling Forced Exit problem in Decoding