# Imports 

In [13]:
from typing import List
from typing import Union
from typing import Tuple
from IPython.display import display
from IPython.display import Markdown
import hashlib
import os 
import numpy as np
from datetime import datetime
from tqdm import tqdm
from lz78 import LZ78

render = lambda x: display(Markdown(x))

# Reading Files 

In [9]:
base_path = './data/'
outputs_path = './output/'
files = os.listdir(base_path)
files.sort()
files

['a_room_with_a_view.txt',
 'alices_adventures_in_wonderland.txt',
 'dracula.txt',
 'mob_dick.txt',
 'pride_and_prejudice.txt',
 'romeo_and_juliet.txt',
 'the_odyssey.txt',
 'the_picture_of_dorian_gray.txt',
 'twenty_years_after.txt',
 'ulysses.txt']

In [10]:
files_mapper_name = {
    'a_room_with_a_view.txt':'A Room with a View',
    'alices_adventures_in_wonderland.txt': "Alice's Adventures in Wonderland",
    'dracula.txt':'Dracula',
    'mob_dick.txt':'Mob Dick',
    'pride_and_prejudice.txt':'Pride and Prejudice',
    'romeo_and_juliet.txt':'Romeo and Juliet',
    'the_odyssey.txt':'The Odyssey',
    'the_picture_of_dorian_gray.txt':'The Picture of Dorian Gray',
    'twenty_years_after.txt':'Twenty Years After',
    'ulysses.txt':'Ulysses',
}

## Algorithm

In [14]:
lz78:LZ78 = LZ78()

In [19]:
for en, each_file in enumerate(files):

    original_path = base_path+each_file
    compressed_path = outputs_path + each_file[:-4] + '.lz78' 
    descompressed_path = outputs_path + each_file[:-4] + '_dec.txt'

    start_time = datetime.now()
    encoding_start_time = start_time
    
    original_md5 = hashlib.md5(lz78.read_file(original_path).encode()).hexdigest()
    lz78.encode(original_path, compressed_path, 36)
    
    encoding_end_time = datetime.now()
    decoding_start_time = datetime.now()

    compressed_md5 = hashlib.md5(lz78.read_file(compressed_path).encode()).hexdigest()
    lz78.decode(compressed_path, descompressed_path, 36)
    
    decoding_end_time = datetime.now()

    descompressed_md5 = hashlib.md5(lz78.read_file(descompressed_path).encode()).hexdigest()

    original_size = os.path.getsize(original_path)
    compressed_size = os.path.getsize(compressed_path)

    encoding_time = (encoding_end_time - encoding_start_time).total_seconds()
    decoding_time = (decoding_end_time - decoding_start_time).total_seconds()
    total_time = decoding_time + encoding_time

    render("""
    ***

    * ID                         : {}
    * Book                       : {}
    * File Name                  : {}
    * Compressed File Name       : {}
    * Original file MD5 Hash     : `{}`
    * Decompressed file MD5 Hash : `{}`
    * Compressed file MD5 Hash   : `{}`
    * Correct Descompression?    : {}
    * Original Size              : {}B
    * Compressed Size            : {}B
    * Difference                 : {}B
    * Reduction Percentage       : {:.2f}%
    * Encoding time              : {:4f}s
    * Decoding time              : {:4f}s
    * Total time                 : {:4f}s
    """.format(
        en,
        files_mapper_name[each_file],
        original_path,
        compressed_path,
        original_md5,
        descompressed_md5,
        compressed_md5,
        'Yes' if original_md5 == descompressed_md5 else 'No',
        original_size,
        compressed_size,
        original_size-compressed_size,
        100*((original_size-compressed_size)/original_size),
        encoding_time,
        decoding_time,
        total_time
    ))
    


    ***

    * ID                         : 0
    * Book                       : A Room with a View
    * File Name                  : ./data/a_room_with_a_view.txt
    * Compressed File Name       : ./output/a_room_with_a_view.lz78
    * Original file MD5 Hash     : `3c60e35445d2a92a45542e208e3622a3`
    * Decompressed file MD5 Hash : `3c60e35445d2a92a45542e208e3622a3`
    * Compressed file MD5 Hash   : `58630fb240da593fe86a7ac56fbb9cdd`
    * Correct Descompression?    : Yes
    * Original Size              : 406490B
    * Compressed Size            : 309278B
    * Difference                 : 97212B
    * Reduction Percentage       : 23.91%
    * Encoding time              : 114.086839s
    * Decoding time              : 2.804593s
    * Total time                 : 116.891432s
    


    ***

    * ID                         : 1
    * Book                       : Alice's Adventures in Wonderland
    * File Name                  : ./data/alices_adventures_in_wonderland.txt
    * Compressed File Name       : ./output/alices_adventures_in_wonderland.lz78
    * Original file MD5 Hash     : `52c1597f26c5f35afbcdd9c0dfa81057`
    * Decompressed file MD5 Hash : `52c1597f26c5f35afbcdd9c0dfa81057`
    * Compressed file MD5 Hash   : `633fe0221f3626784e883ed31d861064`
    * Correct Descompression?    : Yes
    * Original Size              : 170517B
    * Compressed Size            : 127124B
    * Difference                 : 43393B
    * Reduction Percentage       : 25.45%
    * Encoding time              : 17.552642s
    * Decoding time              : 0.490239s
    * Total time                 : 18.042881s
    