In [2]:
class node:
    def __init__(self, name, num, right = None, left = None):
        self.right = right
        self.left  = left
        self.name  = name
        self.num   = num
    
    def __str__(self):
        return str((self.name, self.num, self.left, self.right))


class HuffmanCoding:
    def __init__(self, path):
        self.path = path
        self.frequency = {}
        self.codes = {}
        self.temp_code = ''
        self.write = '0'
        self.i = 0
        self.read = ''
        self.final_string = ''


    def compress(self):
        self.to_list()
        self.get_frequency()
        #for i in self.frequency:
        #    print(i)
        self.create_tree()
        self.create_codes(self.frequency[0], '')
        self.write_file()

    def to_list(self):
        with open(self.path, 'r') as f:
             self.list = f.read()
        self.list = list(self.list)


    def get_frequency(self):
        self.frequency = dict.fromkeys(list(dict.fromkeys(self.list))) #Deletes Repetitions and creates dict
        for c in self.frequency.keys():
            self.frequency[c] = self.list.count(c)
        self.frequency = sorted(self.frequency.items(), key=lambda x: x[1], reverse = False)

        for i in range(len(self.frequency)): #Turn them all into nodes
            self.frequency[i] = node(self.frequency[i][0], self.frequency[i][1])


    def create_tree(self):
        self.i += 1
        while(len(self.frequency) != 1):
            inserted = False
            parent = node("P" + str(self.i), self.frequency[0].num + self.frequency[1].num, self.frequency[0], self.frequency[1])
            #print("Making Node From ", self.frequency[0].name, " and ", self.frequency[1].name)

            del self.frequency[0]
            del self.frequency[0]

            for i in range(len(self.frequency)):
                if(self.frequency[i].num >= parent.num): 
                    if(not inserted):
                        #print("Inserting at: ", i)
                        self.frequency.insert(i, parent)
                        inserted = True
            if(not inserted):
                self.frequency.append(parent)

    def create_codes(self, subtree, code):
        if(subtree.right == None and subtree.left == None): #Found a letter
            #print("Code for ", subtree.name, ": ", code)
            self.codes[subtree.name] = code
            return
        else:
            self.create_codes(subtree.right, code + '0')
            self.create_codes(subtree.left,  code + '1')
            return

    def write_file(self):
        for c in self.list:
            self.write += self.codes[c]
       
        #Padding End:
        self.write += ("1" + (7 - len(self.write) % 8) * '0')
        

        #print("Writing: ", self.write)

        self.b = bytearray()
        for i in range(0, len(self.write), 8):
            byte = self.write[i:i+8]
            self.b.append(int(byte, 2))
        #print("Writing: ", self.b)
        with open(self.path.split('.')[0] + ".bin", "wb") as f:
            f.write(bytes(self.b))


    def decompress(self):
        #Extracts the binary and puts it into self.string
        with open(self.path.split('.')[0] + ".bin", "rb") as f:
            self.read = ''
            byte = f.read(1)
            while(byte):
                b = bin(ord(byte)).split('b')[1]
                #print("This: ", b)
                if(len(b) == 8):
                    self.read += b
                else:
                    #print("Make:", "0" * (8 - len(b) % 8) + b)
                    self.read += ("0" * (8 - len(b) % 8) + b)
                byte = f.read(1)

        #Delete last byte
        while(self.read[-1] != '1'):
            #print(self.read[-1])
            self.read = self.read[:-1]
        self.read = self.read[:-1]

        #Add it without leading 0's
        #self.read += b
        #print("Reading: ", self.read)
        self.read = self.read[1:]
        #print("Reading: ", self.read)

        self.decodes = {value : key for (key, value) in self.codes.items()}
        #print(self.decodes)
        word = ''
        for bit in self.read:
            word += bit
            if(word in self.decodes.keys()):
                self.final_string += self.decodes[word]
                word = ''
        print("Decoded:", self.final_string)

        #Write to Text
        with open(self.path.split('.')[0] + "_decoded.txt", "w") as f:
            f.write(self.final_string)




In [3]:
h = HuffmanCoding('file3.txt')
h.compress()

In [5]:
h.decompress()

Decoded: 
I call our world Flatland, not because we call it so, but to make its
nature clearer to you, my happy readers, who are privileged to live in
Space.

Imagine a vast sheet of paper on which straight Lines, Triangles,
Squares, Pentagons, Hexagons, and other figures, instead of remaining
fixed in their places, move freely about, on or in the surface, but
without the power of rising above or sinking below it, very much like
shadows -- only hard and with luminous edges -- and you will then have a
pretty correct notion of my country and countrymen. Alas, a few years
ago, I should have said "my universe": but now my mind has been opened to
higher views of things.

In such a country, you will perceive at once that it is impossible that
there should be anything of what you call a "solid" kind; but I dare say
you will suppose that we could at least distinguish by sight the
Triangles Squares and other figures moving about as I have described
them. On the contrary, we could see nothing of