In [None]:
!apt-get install libvips-dev -y --no-install-recommends --download-only -o dir::cache='./'

!mkdir ./libvips
!mv ./archives/* ./libvips
!rm -rf ./archives
!ls ./libvips

!yes | dpkg -i ./libvips/*.deb

!pip install pyvips
!pip wheel pyvips
!mkdir pyvips
!mv *.whl ./pyvips

In [None]:
!pip install numpy pandas tqdm joblib pillow scikit-image GPUtil

In [1]:
import os
import glob
import multiprocessing as mp
from tqdm import tqdm
from worker_utils import extract_prune_tiles # Import worker functions
from worker_utils import get_available_gpu_memory

DATASET_IMAGES = "/home/input"

os.environ['VIPS_DISC_THRESHOLD'] = '9gb'  # Use disk caching for large images
os.environ['VIPS_ACCESS'] = 'sequential'
os.environ['VIPS_LIMIT_MEMORY'] = '16384'  # 16GB memory limit
os.environ['VIPS_CACHE_SIZE'] = '8192'  # 6GB cache size
os.environ['VIPS_CONCURRENCY'] = '6'  # 6 threads

print(f"Pyvips ENV Values: \n VIPS_DISC_THRESHOLD: {os.environ['VIPS_DISC_THRESHOLD']} \n VIPS_ACCESS: {os.environ['VIPS_ACCESS']} \n VIPS_LIMIT_MEMORY: {os.environ['VIPS_LIMIT_MEMORY']} \n VIPS_CACHE_SIZE: {os.environ['VIPS_CACHE_SIZE']} \n VIPS_CONCURRENCY: {os.environ['VIPS_CONCURRENCY']}") 
# Get the list of image paths
ls = sorted(glob.glob(os.path.join(DATASET_IMAGES, '*.png')))
print(f"\n found images: {len(ls)}")

Using device: cuda
Pyvips ENV Values: 
 VIPS_DISC_THRESHOLD: 9gb 
 VIPS_ACCESS: sequential 
 VIPS_LIMIT_MEMORY: 16384 
 VIPS_CACHE_SIZE: 8192 
 VIPS_CONCURRENCY: 6

 found images: 3


In [None]:
def main():
    # Get image paths
    ls = sorted(glob.glob(os.path.join(DATASET_IMAGES, '*.png')))

    memory_per_process = 3000  # MB

    with tqdm(total=len(ls), desc="Overall Progress") as pbar:
        while ls:
            available_memory = get_available_gpu_memory()
            num_processes = max(1, int(available_memory // memory_per_process))
            print(f"Available GPU memory: {available_memory:.2f} MB, Using {num_processes} processes")

            with mp.get_context("spawn").Pool(processes=num_processes) as pool:
                results = []
                for _ in range(min(num_processes, len(ls))):
                    image_path = ls.pop(0)
                    results.append(pool.apply_async(extract_prune_tiles,
                                                    args=((len(ls), image_path), "/home/output", 512, 2.0, 0.7, None, 0.2)))

                for result in results:
                    result.get()
                    pbar.update(1)


if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    main()

Overall Progress:   0%|                                                                           | 0/3 [00:00<?, ?it/s]

Available GPU memory: 15240.00 MB, Using 5 processes
