In [9]:
from platform import python_version

print(python_version())

3.6.4


In [10]:
from collections import Counter
from collections import namedtuple
import heapq
import gevent
from multiprocessing import Pool 
import random
import string
import os
import sys

In [11]:
class Node(namedtuple('Node', ['left', 'right'])):
    def walk(self, code, symcode):
        self.left.walk(code, symcode + "0")
        self.right.walk(code, symcode + "1")
        
class Leaf(namedtuple('Leaf', ['char'])):
    def walk(self, code, symcode):
        if symcode:
            code[self.char] = symcode
        else:
            code[self.char] = "0"

def crt_heap(s):    
    h=[]
    for sym, pr in Counter(s).items():
        h.append((pr, len(h), Leaf(sym)))
    return h
        
def huffman_enc(h):
    count = len(h)
    while len(h) > 1:
        pr1, count1, left = heapq.heappop(h)
        pr2, count2, right = heapq.heappop(h)
        heapq.heappush(h, (pr1 + pr2, count, Node(left, right)))
        count += 1
    coding = {}
    if h:
        [(_pr, _count, root)] = h
        root.walk(coding, "")
    return coding

def huffman_out(h, codec):
    out = ''
    for i in h:
        out = out + codec[i]
    return out
    

# create bunch of gibberish filled files
def create_rand_fls(workpath : str, count_f : int):
    for i in range(count_f):
        newfile = 'rand'+str(i) + '.txt'
        file = open(os.path.join(workpath, newfile), "w")
        gib = ''.join(random.choices(string.ascii_letters + string.digits, k= random.randint(1, 25)))
        file.write(gib) 
        file.close() 


### Algorithm for a single string

In [12]:
def main():
    s = str(input('Input string\n'))
    new_heap = crt_heap(s)
    heapq.heapify(new_heap)
    coding111 = huffman_enc(new_heap)
    output = huffman_out(s, coding111)
    print('\n', coding111, '\n', output)
    
if __name__ == '__main__':
    main()

Input string
fsf

 {'s': '0', 'f': '1'} 
 101


### Algorithm for several string sources

In [20]:
count_f = None
while count_f is None:
    try:
        count_f = int(input('How many text files need to be created?\n'))
    except ValueError:
        print('This is not a number. Enter a value that can be converted to a numeric\n')
        
#working_path = 'C:\\Users\\Ася\\huffman files txt'
working_path = input('Specify working directory\n')
if os.path.exists(working_path) and not os.path.isfile(working_path):
    pass
else:
    print('The specified directory does not exist')

    
create_rand_fls(working_path, count_f)
file_names = os.listdir(working_path)
print(file_names)

How many text files need to be created?
5
Specify working directory
C:\\Users\\Ася\\huffman files txt
['rand0.txt', 'rand1.txt', 'rand2.txt', 'rand3.txt', 'rand4.txt']


In [44]:
def main(workpath):
    conc = ''
    content = {}
    for every in os.listdir(workpath):
        curfile = open(os.path.join(workpath, every), "r")
        s = curfile.readline()
        curfile.close()
        content[every]=str(s)
    conc = ''.join(content.values()) 
    new_heap = crt_heap(conc)
    heapq.heapify(new_heap)
    coding111 = huffman_enc(new_heap)
    print(coding111, '\n')
    for every in content.items():
        output = huffman_out(every[1], coding111) #изначально это были разные строки. Верни в поступившем виде - в цикле
        print('Content of file', every[0], ':',  output, '\n')

In [45]:
if __name__ == '__main__':
    main(working_path)

{'6': '00000', 'W': '00001', 'H': '00010', 'l': '00011', 'c': '00100', 'Q': '00101', 'G': '00110', '9': '00111', '2': '01000', 'w': '01001', '0': '01010', 'Y': '01011', 'T': '0110', 'h': '0111', 'o': '1000', 'u': '1001', 'P': '1010', 'f': '10110', 'J': '10111', 'v': '11000', 'a': '11001', '1': '11010', 'B': '110110', '4': '110111', 'y': '111000', 'C': '111001', 'm': '111010', 't': '111011', 'K': '111100', 'F': '111101', 'x': '111110', 'R': '111111'} 

Content of file rand0.txt : 1101101011011011111100010111011001111110010111 

Content of file rand1.txt : 111010111011111100 

Content of file rand2.txt : 1100011110111111011111111001100010011011110100000010100111000011000 

Content of file rand3.txt : 1011000010000110010011010 

Content of file rand4.txt : 00101101001100011011000001110100001001110100101011001100110010110010111000 



### Мультипроцессное выполнение кода

In [47]:
%%time
if __name__ == '__main__':
    with Pool(2) as p:
        main(working_path) #сюда добавить map по элементам в директории file_names вместо цикла из main. Возможно, так мультипроц-ть будет быстрее
        #map(main, file_names)
    for f in os.listdir(working_path):
        os.remove(os.path.join(working_path, f))

{} 

Wall time: 158 ms
