In [None]:
#%debug
import os
import types
import re
import zipfile
from tqdm.auto import tqdm
from functools import partial

#useful for debugging:
#from IPython.core.debugger import set_trace

#global variables used in functions
progress_file = 0
progress_total = 0

def get_foldersize(source_path, pattern=r'.*', recursive=False):
    #Get total size of all files in a folder
    total = 0
    
    for root, folders, files in os.walk(source_path, topdown=True):
        if recursive == False:
            #remove folders from set to prevent traversing to sub-folders.
            folders[:] = []
                
        #only select files based on pattern
        files[:] = [file for file in files if re.match(pattern, file)]
        for file in files:
            path = os.path.join(root, file)
            total += os.path.getsize(path)
    
    return total

def progress_write(write, self, buf):
    #every call to the zipfile.fp.write method will read 8Kbytes of data
    #https://github.com/python/cpython/blob/master/Lib/zipfile.py
    block_size = 1024*8 # From zipfile.write: `shutil.copyfileobj(src, dest, 1024*8)`
    
    global progress_file
    global progress_total
    global arcname
    global debug
    
    if progress_file == 0 and buf[0:2] == b'PK':
        #start of file, no actual data read
        bytes_read = 0

    elif progress_file > 0 and buf[0:2] == b'PK':
        #end of file
        bytes_read = pbar_file.total - pbar_file.n        
    
    elif (progress_file + block_size) > pbar_file.total:
        #block_size cannot read beyond end of file (or small file < 8K)
        bytes_read = pbar_file.total - progress_file
   
    else: 
        bytes_read = block_size
            
    progress_file += bytes_read
    progress_total += bytes_read
    
    pbar_file.n = progress_file       
    pbar_total.n = progress_total
    
    #trigger update
    pbar_file.update(0)
    pbar_total.update(0)
    
    #writes the buffer using the built-in write() function
    return write(buf)

'''
Creates a zip file with files from a specific path. Uses a wrapper function to get
progress status from the write function that (indirectly) is called by ZipFile. Displays progress
using two tqdm progress bars.
'''

recursive = False
pattern = r'.*\.csv' #retrieves all csv-files, regular expression
source_path = '/research/data/AmsterdamUMCdb'
destination_path = '~/AmsterdamUMCdb-v1.0.2.zip'

#expands ~ for *nix environments
source_path = os.path.expanduser(source_path)
destination_path = os.path.expanduser(destination_path)

#extract file name of zip file
zfilename = os.path.basename(destination_path)

#create zip file at destination path
zfile = zipfile.ZipFile(destination_path, 'w', 
                           compression=zipfile.ZIP_DEFLATED,
                           allowZip64 = True)

#keep records of total progress.
foldersize = get_foldersize(source_path, pattern, recursive)

#create two tqdm progress bars
pbar_total = tqdm(total=foldersize, desc='Creating ' + zfilename, 
                  #colour='#4495ed' , #blue running
                  dynamic_ncols=True, unit_scale=1, unit='Bytes', leave=True)

pbar_file = tqdm(total=0, desc='Zipping ', dynamic_ncols=True, unit_scale=1, unit='Bytes', leave=True)

#Replace original write() function with a wrapper to track progress
zfile.fp.write = types.MethodType(partial(progress_write, zfile.fp.write), zfile.fp)

arcname = ""
for root, folders, files in os.walk(source_path, topdown=True):
    if recursive == False:
        #remove folders from set to prevent traversing to sub-folders.
        folders[:] = []

    #only select files based on pattern
    files[:] = [file for file in files if re.match(pattern, file)]
    for file in files:
        progress_file = 0
       
        #zipping filepath as arcname in the zipfile (using relative paths)
        filepath = os.path.join(root, file)
        arcname = os.path.relpath(filepath, source_path)
        filesize = os.path.getsize(filepath)      

        pbar_file.set_description('Zipping ' + arcname)
        pbar_file.reset(total=filesize)
        
        #writes the csv file to the zip file
        zfile.write(filepath, arcname)
             
#sets the progress status to finished 
#pbar_total.colour = '#66ac5b' #green
pbar_file.close()
pbar_total.close()

#cleanup     
zfile.close()
print('Done.')