In [38]:
import cv2
import numpy as np
import gzip
import pickle
import bisect

In [5]:
def dct(path):
    
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    dct = cv2.dct(np.float32(img))

    idct = cv2.idct(dct)

    cv2.imwrite('dct_image.jpg', dct)
    cv2.imwrite('idct_image.jpg', idct)

In [6]:
dct("cat.jpg")

In [23]:
def compression(path):
    
    with open(path, "rb") as f:
        text = str(f.read())
    file = bytes(text, 'utf-8')
    gzip_file = gzip.GzipFile("file.gz", 'wb')
    gzip_file.write(file)
    gzip_file.close()

In [26]:
compression("1.txt")

In [33]:
AlphPower = 255
FirstCode = 0
CodingLength = 5

In [34]:
def count_file_symbols(filename):
    symbol_count = 0
    ents = [0] * AlphPower
    input_file = open(filename, 'r')
    for line in input_file:
        for char in line:
            char_code = ord(char)
            ents[char_code] = ents[char_code] + 1
            symbol_count = symbol_count + 1
    input_file.close()
    return symbol_count, ents

In [39]:
# calculates the probability of each symbol.
# Saves it to a file (needed for decoding).
# Stored as a dictionary. The file is binary,
# and for writing and reading from it - the pickle module.
def init_model(inputFilename, outputFilename):
    (symbol_count, ents) = count_file_symbols(inputFilename)

    for char_code in range(FirstCode, 255):
        ents[char_code] = 1.0 * ents[char_code] / symbol_count

    probability_to_char = []
    for char_code in range(FirstCode, 255):
        probability_to_char.append((ents[char_code], [char_code]))

    model_file = open(outputFilename, 'wb')
    pickle.dump(probability_to_char, model_file)
    model_file.close()

def read_model(modelFilename):
    input_file = open(modelFilename, 'rb')
    dict = pickle.load(input_file)
    input_file.close()
    return dict

def add_code_for_sequences(char_to_code, sequence, code):
    for char in sequence:
        char_to_code[char] = code + char_to_code[char]

def calc_haffman_code(probability_to_sequence):
    char_to_code = [''] * AlphPower

    while len(probability_to_sequence) > 1:
        (first_prob, first_seq) = probability_to_sequence.pop(0)
        (second_prob, second_seq) = probability_to_sequence.pop(0)

        prob = first_prob+second_prob
        seq = first_seq+second_seq

        add_code_for_sequences(char_to_code, first_seq, '0')
        add_code_for_sequences(char_to_code, second_seq, '1')

        bisect.insort(probability_to_sequence, (prob, seq))
    return char_to_code

def print_char_codes(char_to_code):
    char = 0
    for code in char_to_code:
        code = char_to_code[char]
        print(str(char) + ' ' + str(code))
        char = char+1

def create_code_to_char(char_to_code):
    code_to_char = {}
    char = 0
    for code in char_to_code:
        code_to_char[code] = char
        char = char + 1
    return code_to_char

def haffman_code(input_filename, char_to_code, output_filename):
    input_file = open(input_filename, 'r')
    output_file = open(output_filename, 'w')

    while 1:
        char = input_file.read(1)
        if not char:
            break
        code = char_to_code[ord(char)]
        output_file.write(code)

    input_file.close()
    output_file.close()

def haffman_decode(input_filename, code_to_char, output_filename):
    input_file = open(input_filename, 'r')
    output_file = open(output_filename, 'w')

    sequence = ''
    while 1:
        char = input_file.read(1)
        if not char:
            break

        sequence = sequence+char
        if sequence in code_to_char:
            #code_to_char.keys
             #   .has_key(sequence):
            value = code_to_char[sequence]
            output_file.write(chr(value))
            sequence = ''

    input_file.close()
    output_file.close()

init_model('in.txt', 'ents.txt')
probability_to_char = read_model('ents.txt')
probability_to_char = sorted(probability_to_char)
print(probability_to_char)

char_to_code = calc_haffman_code(probability_to_char)
#print_char_codes(char_to_code)
code_to_char = create_code_to_char(char_to_code)

haffman_code('in.txt', char_to_code, 'code.txt')
haffman_decode('code.txt', code_to_char, 'decode.txt')

print(code_to_char)

[(0.0, [0]), (0.0, [1]), (0.0, [2]), (0.0, [3]), (0.0, [4]), (0.0, [5]), (0.0, [6]), (0.0, [7]), (0.0, [8]), (0.0, [9]), (0.0, [11]), (0.0, [12]), (0.0, [13]), (0.0, [14]), (0.0, [15]), (0.0, [16]), (0.0, [17]), (0.0, [18]), (0.0, [19]), (0.0, [20]), (0.0, [21]), (0.0, [22]), (0.0, [23]), (0.0, [24]), (0.0, [25]), (0.0, [26]), (0.0, [27]), (0.0, [28]), (0.0, [29]), (0.0, [30]), (0.0, [31]), (0.0, [32]), (0.0, [33]), (0.0, [34]), (0.0, [35]), (0.0, [36]), (0.0, [37]), (0.0, [38]), (0.0, [39]), (0.0, [40]), (0.0, [41]), (0.0, [42]), (0.0, [43]), (0.0, [44]), (0.0, [45]), (0.0, [46]), (0.0, [47]), (0.0, [48]), (0.0, [55]), (0.0, [56]), (0.0, [57]), (0.0, [58]), (0.0, [59]), (0.0, [60]), (0.0, [61]), (0.0, [62]), (0.0, [63]), (0.0, [64]), (0.0, [65]), (0.0, [66]), (0.0, [67]), (0.0, [68]), (0.0, [69]), (0.0, [70]), (0.0, [71]), (0.0, [72]), (0.0, [73]), (0.0, [74]), (0.0, [75]), (0.0, [76]), (0.0, [77]), (0.0, [78]), (0.0, [79]), (0.0, [80]), (0.0, [81]), (0.0, [82]), (0.0, [83]), (0.0, [8