In [1]:
from platform import python_version

print(python_version())

3.6.4


In [2]:
from collections import Counter
from collections import namedtuple
import heapq
import gevent
from multiprocessing import Pool 
import random
import string
import os
import sys

In [3]:
class Node(namedtuple('Node', ['left', 'right'])):
    def walk(self, code, symcode):
        self.left.walk(code, symcode + "0")
        self.right.walk(code, symcode + "1")
        
class Leaf(namedtuple('Leaf', ['char'])):
    def walk(self, code, symcode):
        if symcode:
            code[self.char] = symcode
        else:
            code[self.char] = "0"

def crt_heap(s):    
    h=[]
    for sym, pr in Counter(s).items():
        h.append((pr, len(h), Leaf(sym)))
    return h
        
def huffman_enc(h):
    count = len(h)
    while len(h) > 1:
        pr1, count1, left = heapq.heappop(h)
        pr2, count2, right = heapq.heappop(h)
        heapq.heappush(h, (pr1 + pr2, count, Node(left, right)))
        count += 1
    coding = {}
    if h:
        [(_pr, _count, root)] = h
        root.walk(coding, "")
    return coding

def huffman_out(h, codec):
    out = ''
    for i in h:
        out = out + codec[i]
    return out
    

# create bunch of gibberish filled files
def create_rand_fls(workpath : str, count_f : int):
    for i in range(count_f):
        newfile = 'rand'+str(i) + '.txt'
        file = open(os.path.join(workpath, newfile), "w")
        gib = ''.join(random.choices(string.ascii_letters + string.digits, k= random.randint(1, 25)))
        file.write(gib) 
        file.close() 


#### Algorithm for a single string

In [4]:
count_f = None
while count_f is None:
    try:
        count_f = int(input('How many text files need to be created?\n'))
    except ValueError:
        print('This is not a number. Enter a value that can be converted to a numeric\n')
        
#working_path = 'C:\\Users\\Ася\\huffman files txt'
working_path = input('Specify working directory\n')
if os.path.exists(working_path) and not os.path.isfile(working_path):
    pass
else:
    print('The specified directory does not exist')

    
create_rand_fls(working_path, count_f)
file_names = os.listdir(working_path)
print(file_names)

How many text files need to be created?
2
Specify working directory
C:\\Users\\Ася\\huffman files txt
['rand0.txt', 'rand1.txt']


In [5]:
def main(workpath):
    for every in os.listdir(workpath):
        curfile = open(os.path.join(workpath, every), "r")
        s = curfile.readline()
        curfile.close()
        new_heap = crt_heap(s)
        heapq.heapify(new_heap)
        coding111 = huffman_enc(new_heap)
        output = huffman_out(s, coding111)
        print('Content of file', every, ':', s, '\n', coding111, '\n', output)

In [6]:
%%time
if __name__ == '__main__':
    with Pool(2) as p:
        main(working_path) #сюда добавить map по элементам в директории file_names вместо цикла из main. Возможно, так мультипроц-ть будет быстрее
        #map(main, file_names)
    for f in os.listdir(working_path):
        os.remove(os.path.join(working_path, f))

Content of file rand0.txt : hT09wWDCYaAA7poOcNiP80 
 {'a': '0000', '7': '0001', 'p': '0010', 'o': '0011', 'O': '0100', 'c': '0101', 'N': '0110', 'i': '0111', 'P': '1000', '8': '1001', '0': '1010', 'A': '1011', 'h': '11000', 'T': '11001', '9': '11010', 'w': '11011', 'W': '11100', 'D': '11101', 'C': '11110', 'Y': '11111'} 
 110001100110101101011011111001110111110111110000101110110001001000110100010101100111100010011010
Content of file rand1.txt : 8CPYA97WWo2xTHEJgj 
 {'P': '0000', 'Y': '0001', 'A': '0010', '9': '0011', '7': '0100', 'o': '0101', '2': '0110', 'x': '0111', 'T': '1000', 'H': '1001', 'E': '1010', 'J': '1011', 'g': '1100', 'j': '1101', 'W': '1110', '8': '11110', 'C': '11111'} 
 11110111110000000100100011010011101110010101100111100010011010101111001101
Wall time: 223 ms


#### Algorithm for several string sources

In [34]:
def main():
    pass

Создание отдельных справочников не совсем соответствует заданию. Нужно конкатенировать все данные в одну строку и работать с ней - создать алгоритм Хаффмана. Далее print('Content of file', every.name, ':', s, '\n', coding111, '\n', output) для каждого файла с общим словарем. Я это буду реализовывать в другом main тут же ниже