In [1]:
from ANS import *
from arithmetic_coding import *
from huffman import *
from sANS import *
import time
from utils.utils import get_symbols

65


In [2]:
symbols= ['a','b','c','d']
dist= {"Uniform": [4, 4, 4, 4], # natural powers of two
        "Natural": [8,4,2,2],
        "Random": [6,2,8,4],
        # "left": [16,16,6,4],
        # "right": [4,4,16,20],
        # "middle": [6, 24, 24, 6]
        }
symbol_len = [5,10,20,30]

experiment_symbols = {}
for name, freq in dist.items():
    sym = []
    for slen in symbol_len:
        sym.append(get_symbols(symbols, freq, slen))
    experiment_symbols[name] = sym
    
# ans = rANS(symbols, frequency)
# huff = Huffman(symbols, frequency)
# sans = sANS(symbols, frequency)
# ari = ArithmeticCoding(symbols, frequency)
# ran = RangeCoding(symbols, frequency)

encoding_time = {}
decoding_time = {}
compression_size = {}

In [3]:
## Huffman 
def huffan():
	for name, freq in dist.items():
		huff = Huffman(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, root_node = huff.encode(s)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append(len(enc_value))

			dec_start = time.perf_counter_ns()
			decoded_val = huff.decode(enc_value, root_node)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)

		encoding_time['Huffman@'+name] = enc_time
		compression_size['Huffman@'+name] = size
		decoding_time['Huffman@'+name] = dec_time
huffan()


In [4]:
def rangeans():
	for name, freq in dist.items():
		ans = rANS(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, msg_len = ans.encode(s, 0)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append(len(enc_value))

			dec_start = time.perf_counter_ns()
			decoded_val = ans.decode(enc_value, msg_len)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)
		encoding_time['rANS@'+name] = enc_time
		compression_size['rANS@'+name] = size
		decoding_time['rANS@'+name] = dec_time
rangeans()

In [5]:
def streaming():
	for name, freq in dist.items():
		sans = sANS(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			start = time.perf_counter_ns()
			enc_value, bit = sans.encode(s)
			stop = time.perf_counter_ns()
			enc_time.append(stop-start)

			size.append(len(enc_value)+len(bit))
			
			dec_start = time.perf_counter_ns()
			decoded_val = sans.decode(enc_value,bit)
			dec_stop = time.perf_counter_ns()
			dec_time.append(dec_stop-dec_start)
		encoding_time['sANS@'+name] = enc_time
		compression_size['sANS@'+name] = size
		decoding_time['sANS@'+name] = dec_time
streaming()

In [6]:
def arith():
	for name, freq in dist.items():
		sans = ArithmeticCoding(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			try:
				start = time.perf_counter_ns()
				enc_value, bit = sans.encode(msg = s)
				stop = time.perf_counter_ns()
				enc_time.append(stop-start)
			
				size.append(len(enc_value))
				
				dec_start = time.perf_counter_ns()
				decoded_val = sans.decode(enc_value,bit)
				dec_stop = time.perf_counter_ns()
				dec_time.append(dec_stop-dec_start)
			except:
				enc_time.append(math.inf)
				dec_time.append(math.inf)
				size.append(math.inf)
		encoding_time['Arithmetic@'+name] = enc_time
		compression_size['Arithmetic@'+name] = size
		decoding_time['Arithmetic@'+name] = dec_time
arith()

In [7]:
def aritrang():
	for name, freq in dist.items():
		sans = RangeCoding(symbols, freq)
		exp_symbols = experiment_symbols[name]
		enc_time = []
		dec_time = []
		size = []
		for s in exp_symbols:
			try:
				start = time.perf_counter_ns()
				enc_value, bit = sans.encode(msg = s)
				stop = time.perf_counter_ns()
				enc_time.append(stop-start)
			
				size.append(len(enc_value))
				
				dec_start = time.perf_counter_ns()
				decoded_val = sans.decode(enc_value,bit)
				dec_stop = time.perf_counter_ns()
				dec_time.append(dec_stop-dec_start)
			except:
				print(s,freq)
				break
		encoding_time['Range@'+name] = enc_time
		compression_size['Range@'+name] = size
		decoding_time['Range@'+name] = dec_time
aritrang()

In [8]:
compression_size = list(compression_size.items())
encoding_time = list(encoding_time.items())
decoding_time = list(decoding_time.items())
compression_size

[('Huffman@Uniform', [10, 20, 40, 60]),
 ('Huffman@Natural', [10, 13, 35, 53]),
 ('Huffman@Random', [10, 17, 43, 58]),
 ('rANS@Uniform', [12, 23, 43, 62]),
 ('rANS@Natural', [13, 16, 36, 52]),
 ('rANS@Random', [12, 21, 43, 61]),
 ('sANS@Uniform', [17, 37, 77, 137]),
 ('sANS@Natural', [17, 30, 65, 118]),
 ('sANS@Random', [17, 33, 71, 131]),
 ('Arithmetic@Uniform', [11, 21, 41, inf]),
 ('Arithmetic@Natural', [11, 14, 36, 54]),
 ('Arithmetic@Random', [12, 18, 40, inf]),
 ('Range@Uniform', [8, 17, 38, 58]),
 ('Range@Natural', [9, 7, 34, 52]),
 ('Range@Random', [10, 15, 39, 56])]

In [9]:
comp_df_data = {}
for i in range(len(compression_size)):
    if i % 3 == 0:
        name = compression_size[i][0].split('@')[0]
        l1 = compression_size[0+i][1]
        l2 = compression_size[1+i][1]
        l3 = compression_size[2+i][1]
        l4 = list(np.mean(np.array([l1, l2, l3]), axis=0))
        comp_df_data[name] = l4
        

In [10]:
enc_df_data = {}
for i in range(len(encoding_time)):
    if i % 3 == 0:
        name = encoding_time[i][0].split('@')[0]
        l1 = encoding_time[0+i][1]
        l2 = encoding_time[1+i][1]
        l3 = encoding_time[2+i][1]
        l4 = list(np.mean(np.array([l1, l2, l3]), axis=0))
        enc_df_data[name] = l4

In [11]:
dec_df_data = {}
for i in range(len(decoding_time)):
    if i % 3 == 0:
        name = decoding_time[i][0].split('@')[0]
        l1 = decoding_time[0+i][1]
        l2 = decoding_time[1+i][1]
        l3 = decoding_time[2+i][1]
        l4 = list(np.mean(np.array([l1, l2, l3]), axis=0))
        dec_df_data[name] = l4

In [15]:
ex_time_df = pd.DataFrame.from_dict(enc_df_data)
dec_time_df = pd.DataFrame.from_dict(dec_df_data)
comp_df = pd.DataFrame.from_dict(comp_df_data)
comp_df

Unnamed: 0,Huffman,rANS,sANS,Arithmetic,Range
0,10.0,12.333333,17.0,11.333333,9.0
1,16.666667,20.0,33.333333,17.666667,13.0
2,39.333333,40.666667,71.0,39.0,37.0
3,57.0,58.333333,128.666667,inf,55.333333


In [18]:
import plotly.express as px
df = comp_df
fig = px.histogram(df, x=['0', '1','2', '3'] , y=['Huffman', 'rANS', 'sANS', 'Arithmetic', 'Range'], barmode='group', title="Comparision: Average Size of Compressed Data")
fig.update_layout( yaxis_title="Average Compressed Data Size", )

fig.show()

In [None]:
import plotly.express as px
df = ex_time_df
fig = px.histogram(df, x="Compression", y="Average Encoding Time",
             color='Distribuiton', barmode='group',title="Comparision: Average Encoding Time")
fig.update_layout( yaxis_title="Average Encodng Time (ns)" )

fig.show()

In [None]:
import plotly.express as px
df = dec_time_df
fig = px.histogram(df, x="Compression", y="Average Decoding Time",
             color='Distribuiton', barmode='group',
             title="Comparision: Average Decoding Time")
fig.update_layout( yaxis_title="Average Decoding Time (ns)" )

fig.show()