In [1]:
import sys

class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value 
        self.left = None
        self.right = None
    
    def __str__(self):
        return f'Key is {self.key} and value is {self.value}'

class Tree:
    def __init__(self, root):
        self.root = root

    def __str__(self):
        return f'Key is {self.key} and value is {self.value}'

def insertNode(newNode, nodeList):
    for index,node in enumerate(nodeList):
        if newNode.value <= node.value:
            nodeList.insert(index, newNode)
            return
    nodeList.append(newNode)
    return

def createTree(nodeList):
    # Takes the nodelist and creates a tree. 
    # Returns the last node which is the root node of the tree
    while len(nodeList) >= 2:
        node1 = nodeList[0]
        node2 = nodeList[1]
        newNode = Node(None,nodeList[0].value + nodeList[1].value)
        newNode.left = node1
        newNode.right = node2
        nodeList = nodeList[2:]
        insertNode(newNode, nodeList)
    
    if len(nodeList) == 1:
        return nodeList[0]

def getTreeDict(currentNode, code = ""):
    # Recursive function that goes to every leaf node and adds it into a dictionary with encoding
    
    codedDict = {}
    if(currentNode.left):
        dict1 = getTreeDict(currentNode.left, code + "0")
        codedDict.update(dict1)
    if(currentNode.right):
        dict2 = getTreeDict(currentNode.right, code + "1")
        codedDict.update(dict2)
    if(currentNode.left is None and currentNode.right is None):
        if code == "":
            code = "0"
        codedDict[currentNode.key] = code
    return codedDict
    
def huffman_encoding(data):
    if data == None or data == "":
        return '0', Tree(Node("",""))
        
    #Takes the data and stores it into a dictionary with frequencies as the values
    freq_dict = {}
    for char in data:
        if char in freq_dict:
            freq_dict[char] += 1
        else:
            freq_dict[char] = 1

    #Convert into ordered list based on frequency
    orderedList = list(freq_dict.items())
    orderedList.sort(key = lambda item: item[1])

    print("ordered list ", orderedList)
    
    nodeList = []       # nodelist is a list of [node, priority]. 

    for pair in orderedList:
        nodeList.append(Node(pair[0],pair[1]))

    print("nodelist", nodeList[0])
    encodedTree = Tree(createTree(nodeList))
    encodedDict = getTreeDict(encodedTree.root)
    print("encoded dict ", encodedDict)
    encodedData = ""
    for char in data:
        encodedData += encodedDict[char]

    return encodedData, encodedTree


def huffman_decoding(data,tree):    
    decodedString = ""
    currentNode = tree.root

    if currentNode.left == None and currentNode.right == None:
        if data == "0":
            decodedString += currentNode.key
        return decodedString

    for char in data:
        if char == '0':
            currentNode = currentNode.left
        elif char == '1':
            currentNode = currentNode.right

        if currentNode.key:
            decodedString += currentNode.key
            currentNode = tree.root
    
    return decodedString

if __name__ == "__main__":
    codes = {}
    ## Test Case 1
    # Where the data provided is empty
    # a_great_sentence = ""

    ## Test Case 2
    # Data provided is not alphabetical
    # a_great_sentence = "1231 23123"

    ## Test Case 3
    # Data provided consists of only 1 character
    # a_great_sentence = "a"

    # a_great_sentence = "The bird is the word"
    # a_great_sentence = "AAAAAAABBBCCCCCCCDDEEEEEE"

    print ("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
    print ("The content of the data is: {}\n".format(a_great_sentence))

    encoded_data, tree = huffman_encoding(a_great_sentence)

    print ("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
    print ("The content of the encoded data is: {}\n".format(encoded_data))

    decoded_data = huffman_decoding(encoded_data, tree)

    print ("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
    print ("The content of the decoded data is: {}\n".format(decoded_data))



The size of the data is: 59

The content of the data is: 1231 23123

ordered list  [(' ', 1), ('1', 3), ('2', 3), ('3', 3)]
nodelist Key is   and value is 1
encoded dict  {' ': '00', '1': '01', '2': '10', '3': '11'}
The size of the encoded data is: 28

The content of the encoded data is: 01101101001011011011

The size of the decoded data is: 59

The content of the decoded data is: 1231 23123

