In [None]:
import numpy as np
from pathlib import Path
from tqdm import tqdm
import zipfile

In [None]:
#resolution = [64, 96, 128]
resolution = [128]
extensions = [['.bing', '.vox', '.rle', '.qstack'], ['.ply', '.xyz', '.binp'], ['.obj', '.stl', '.binm']]
headers = [['Binary raw grid', 'MagicaVoxel', 'RLE', 'QuadStack'], ['PLY', 'XYZ', 'Compressed binary'], ['OBJ', 'STL', 'Binary']]

In [None]:
folder = 'D:/allopezr/Fragments/Vessels_200/'

In [None]:
footprint_compressed = np.zeros((len(resolution), len(extensions), 4))
footprint_uncompressed = np.zeros((len(resolution), len(extensions), 4))
count_files = np.zeros((len(resolution), len(extensions), 4))

# Mb
unit_divisor = 1024**2

for res_idx, res in enumerate(resolution):
    for data_type_idx, data_type in enumerate(extensions):
        for extension_idx, extension in enumerate(data_type):
            # Search recursively for files with the given extension
            files = [f for f in Path(folder).rglob('*' + str(res) + 'r*' + extension + '.zip')]
            print(f'Found {len(files)} files with extension {extension}')
            
            for file in tqdm(files):
                if zipfile.is_zipfile(file):
                    with zipfile.ZipFile(file, 'r') as zip_ref:
                        uncompressed_size = 0
                        for info in zip_ref.infolist():
                            uncompressed_size += info.file_size
                        compressed_size = file.stat().st_size
                        
                        footprint_compressed[res_idx, data_type_idx, extension_idx] += compressed_size / unit_divisor
                        footprint_uncompressed[res_idx, data_type_idx, extension_idx] += uncompressed_size / unit_divisor
                        count_files[res_idx, data_type_idx, extension_idx] += 1

average_footprint_compressed = footprint_compressed / count_files
average_footprint_uncompressed = footprint_uncompressed / count_files

In [None]:
footprint_compressed

In [None]:
latex_table_str = ''

for data_type_idx, data_type in enumerate(headers):
    for extension_idx, extension in enumerate(data_type):
        latex_table_str += '\textbf{' + headers[data_type_idx][extension_idx] + '} & '
        
        for res_idx, res in enumerate(resolution):
            latex_table_str += f'{footprint_compressed[res_idx, data_type_idx, extension_idx]:.2f}MB ({footprint_uncompressed[res_idx, data_type_idx, extension_idx]:.2f}MB) & '
            latex_table_str += f'{average_footprint_compressed[res_idx, data_type_idx, extension_idx]:.2f}MB ({average_footprint_uncompressed[res_idx, data_type_idx, extension_idx]:.2f}MB) & '
        
        latex_table_str = latex_table_str[:-2] + '\\ \n'
    latex_table_str = latex_table_str[:-2] + '\\ \cmidrule{1-9}\n'
latex_table_str = latex_table_str[:-1]

In [None]:
import threading

def calculate_footprint(files_t, footprint_compressed_t, footprint_uncompressed_t, count_files_t):
    for file in files_t:
        if zipfile.is_zipfile(file):
            with zipfile.ZipFile(file, 'r') as zip_ref_t:
                uncompressed_size = 0
                for info in zip_ref_t.infolist():
                    uncompressed_size += info.file_size
                compressed_size = file.stat().st_size
                
                footprint_compressed[res_idx, data_type_idx, extension_idx] += compressed_size / unit_divisor
                footprint_uncompressed[res_idx, data_type_idx, extension_idx] += uncompressed_size / unit_divisor
                count_files[res_idx, data_type_idx, extension_idx] += 1  

In [None]:
max_n_threads = threading.active_count()
footprint_compressed = np.zeros((len(resolution), len(extensions), 4))
footprint_uncompressed = np.zeros((len(resolution), len(extensions), 4))
count_files = np.zeros((len(resolution), len(extensions), 4))

# Mb
unit_divisor = 1024**2

for res_idx, res in enumerate(resolution):
    for data_type_idx, data_type in enumerate(extensions):
        for extension_idx, extension in enumerate(data_type):
            # Search recursively for files with the given extension
            files = [f for f in Path(folder).rglob('*' + str(res) + 'r*' + extension + '.zip')]
            print(f'Found {len(files)} files with extension {extension}')
            
            # Split the files into n_threads chunks
            n_threads = min(max_n_threads, len(files))
            files_chunks = np.array_split(files, n_threads)
            
            threads = []
            footprint_compressed_list = []
            footprint_uncompressed_list = []
            count_files_list = []
            
            print(f'Calculating footprint for {len(files)} files with {n_threads} threads')
            for i in tqdm(range(n_threads)):
                # Create copies of matrices
                footprint_compressed_list.append(footprint_compressed.copy())
                footprint_uncompressed_list.append(footprint_uncompressed.copy())
                count_files_list.append(count_files.copy())
                
                threads.append(threading.Thread(target=calculate_footprint, args=(files_chunks[i], footprint_compressed_list[i], footprint_uncompressed_list[i], count_files_list[i])))
                threads[i].start()
                
            print('Waiting for threads to finish')
            for i in tqdm(range(n_threads)):
                threads[i].join()
                
                # Sum the results
                footprint_compressed += footprint_compressed_list[i]
                footprint_uncompressed += footprint_uncompressed_list[i]
                count_files += count_files_list[i]
                
average_footprint_compressed = footprint_compressed / count_files
average_footprint_uncompressed = footprint_uncompressed / count_files

In [None]:
max_n_threads