# Minio manager

>Manage MinIO connectivity

In [1]:
#| default_exp minio_manager
#%load_ext autoreload
#%autoreload 2

In [2]:
#| export
import nbdev
from testcontainers.minio import MinioContainer
from pathlib import Path
from loguru import logger
import json
import random
from datasets import Dataset, DatasetDict
import cv2
from random import shuffle

  from .autonotebook import tqdm as notebook_tqdm


## MinioManager

In [3]:
#| export
def initialize_bucket(client, # MinIO client
                      bucket_name: str, # Name of the bucket to initialize
                      create_if_not_exist: bool = False # Whether to create the bucket if it doesn't exist
                      ) -> str: # Name of the bucket initialized
    
    """Initializes a bucket with the given name."""
    
    if client.bucket_exists(bucket_name):
        logger.info(f"{bucket_name} exists")
    elif create_if_not_exist:
        logger.info(f"{bucket_name} does not exist. Creation...")
        client.make_bucket(f"{bucket_name}")
        logger.info(f"{bucket_name} created.")
    else:
        logger.warning(f"{bucket_name} does not exist.")
        return bucket_name

In [4]:
#| export
def export_files_to_bucket(client, # MinIO client
                           path_files: str, # The path to the local directory containing the files to be exported.
                           bucket_name: str, # The name of the destination bucket.
                           prefix: str = None # The prefix to be added to the file names in the bucket. Default is None.
                           ):
    "Exports files from a local directory to a specified bucket."
    
    data_path = Path(path_files)
    
    if len(data_path.ls()) > 0:
        if prefix:
            for i in range(len(data_path.ls())):
                filename = data_path.ls()[i].name
                filename = str(filename).replace("[", "").replace("]", "")
                file_path = data_path.ls()[i]
                client.fput_object(bucket_name, f'{prefix}/{filename}', file_path)
            logger.success(f"{len(data_path.ls())} files uploaded to {bucket_name}/{prefix}")
            
        else:
            for i in range(len(data_path.ls())):
                filename = data_path.ls()[i].name
                filename = str(filename).replace("%5B", "[").replace("%5D", "]")

                file_path = data_path.ls()[i]
                client.fput_object(bucket_name, filename, file_path)
            logger.success(f"{len(data_path.ls())} files uploaded to {bucket_name}")
            
        
    else: # If the directory is empty
        logger.warning(f"No files found in {data_path}")
    
    return None

In [5]:
#| export
def list_all_files_in_bucket(client, # MinIO client
                            bucket_name: str, # The name of the bucket to list files from.
                            bucket_prefix: str = "", # The prefix to filter the files in the bucket. Default is an empty string.
                            include_directories: bool = False, # Whether to include directories in the list. Default is False.
                            absolute_paths: bool = False # Whether to return absolute paths or relative paths. Default is False.
                            ) -> list[str]: # A list of file names in the bucket that match the given prefix and include_directories criteria.
    "Lists all files in the specified bucket."
    list_files = []
    bucket_prefix = bucket_prefix if bucket_prefix else None
    if bucket_prefix:
        files = client.list_objects(bucket_name, prefix=bucket_prefix, recursive=True)
        for file in files:
            if include_directories or not file.is_dir:
                file_path = file.object_name
                if not absolute_paths:
                    file_path = file_path.replace(f"{bucket_prefix}/", "")
                list_files.append(file_path)
    else:
        files = client.list_objects(bucket_name)
        for file in files:
            if include_directories or not file.is_dir:
                file_path = file.object_name
                if not absolute_paths:
                    file_path = file_path.replace(f"{bucket_name}/", "")
                list_files.append(file_path)
    return list_files

In [6]:
#| export
def import_files_from_bucket(client, # MinIO client
                            bucket_name, # The name of the bucket to import files from.
                            bucket_prefix:str = None # The prefix to filter the files in the bucket. Default is None.
                            
                            ) -> dict: # A dictionary containing the imported files and their content {file_name: [file_content]}.
    
    
    list_of_path = list_all_files_in_bucket(client, bucket_name, bucket_prefix)
    data  = {}
    for file in list_of_path:
        try:
            response = client.get_object(bucket_name, f"{file if bucket_prefix is None else (bucket_prefix +'/' +file)}")
            data[file] = [response.read()]
            
        finally:
            response.close()
            response.release_conn()
    return data

#|hide
with MinioContainer(image="quay.io/minio/minio:latest") as minio : 
    test_bucket = "test-bucket-2"
    test_folder = Path("../data/test")
    test_bucket_prefix = "test-prefix"
    
    
    ##### TEST initialize_bucket #####
    client = minio.get_client()
    initialize_bucket(client, test_bucket, True)
    if client.bucket_exists(test_bucket):
        logger.success("Test: Bucket initialization - Passed") 
    else:
        logger.error("Test: Bucket initialization - Not passed") 

    ##### TEST export_files_to_bucket and list_all_files_in_bucket #####
    path_test_file_to_export = test_folder.joinpath("pdf_to_export")

    export_files_to_bucket(client, path_test_file_to_export, test_bucket)
    
    
    export_files_to_bucket(client, path_test_file_to_export, test_bucket, prefix=test_bucket_prefix)
    
    
    
    
    # Export files to bucket prefix
    export_files_to_bucket(client, path_test_file_to_export, test_bucket, test_bucket_prefix)
    
    list_of_file_to_export_in_disk = []
    
    for files in range(len(path_test_file_to_export.ls())):
        path_file = path_test_file_to_export.ls()[files]
        list_of_file_to_export_in_disk.append(path_file.name)
        
    
    list_of_file_in_bucket = list_all_files_in_bucket(client, test_bucket)
    
    
    if set(list_of_file_in_bucket)==set(list_of_file_to_export_in_disk): # Here we use set() instead of list because the order is not relevant when verifying equality in set
        logger.success("Test: List_all_files_to_bucket - Passed") 
    else:
        logger.error("Test: Export_files_to_bucket or list_all_files_to_bucket - Not passed")
        
    list_of_file_in_bucket_with_prefix = list_all_files_in_bucket(client, test_bucket, test_bucket_prefix)
    
    
    if set(list_of_file_in_bucket_with_prefix)==set(list_of_file_to_export_in_disk): # Here we use set() instead of list because the order is not relevant when verifying equality in set
        logger.success("Test: List_all_files_to_bucket_with_prefix - Passed") 
    else:
        logger.error("Test: List_all_files_to_bucket_bucket_prefix - Not passed")
        
    ##### TEST import_files_from_bucket #####

    
    files = import_files_from_bucket(client, test_bucket, test_bucket_prefix)
    
    list_of_file_imported_in_disk = []
    for file in files.items():
        list_of_file_imported_in_disk.append(file[0])
    if set(list_of_file_imported_in_disk) == set(list_of_file_to_export_in_disk):
        logger.success("Test: Import files from bucket - Passed")
    else:
        logger.error("Test: Import files from bucket - Not passed")

In [7]:
#|hide
with MinioContainer(image="quay.io/minio/minio:latest") as minio :
    test_bucket = "test-bucket-2"
    test_folder = Path("../data/test")
    test_bucket_prefix = "test-prefix"
    
    ##### TEST initialize_bucket #####
    client = minio.get_client()
    initialize_bucket(client, test_bucket, True)
    assert client.bucket_exists(test_bucket), "Test: Bucket initialization - Not passed"
    logger.success("Test: Bucket initialization - Passed")

    ##### TEST export_files_to_bucket and list_all_files_in_bucket #####
    path_test_file_to_export = test_folder.joinpath("pdf_to_export")
    export_files_to_bucket(client, path_test_file_to_export, test_bucket)
    export_files_to_bucket(client, path_test_file_to_export, test_bucket, prefix=test_bucket_prefix)
    
    # Export files to bucket prefix
    export_files_to_bucket(client, path_test_file_to_export, test_bucket, test_bucket_prefix)
    list_of_file_to_export_in_disk = []
    for files in range(len(path_test_file_to_export.ls())):
        path_file = path_test_file_to_export.ls()[files]
        list_of_file_to_export_in_disk.append(path_file.name)
    list_of_file_in_bucket = list_all_files_in_bucket(client, test_bucket)
    assert set(list_of_file_in_bucket) == set(list_of_file_to_export_in_disk), "Test: Export_files_to_bucket or list_all_files_to_bucket - Not passed"
    logger.success("Test: List_all_files_to_bucket - Passed")

    list_of_file_in_bucket_with_prefix = list_all_files_in_bucket(client, test_bucket, test_bucket_prefix)
    assert set(list_of_file_in_bucket_with_prefix) == set(list_of_file_to_export_in_disk), "Test: List_all_files_to_bucket_with_prefix - Not passed"
    logger.success("Test: List_all_files_to_bucket_with_prefix - Passed")

    ##### TEST import_files_from_bucket #####
    files = import_files_from_bucket(client, test_bucket, test_bucket_prefix)
    list_of_file_imported_in_disk = []
    for file in files.items():
        list_of_file_imported_in_disk.append(file[0])
    assert set(list_of_file_imported_in_disk) == set(list_of_file_to_export_in_disk), "Test: Import files from bucket - Not passed"
    logger.success("Test: Import files from bucket - Passed")


Pulling image testcontainers/ryuk:0.8.1
Container started: b0a45317c2bd
Waiting for container <Container: b0a45317c2bd> with image testcontainers/ryuk:0.8.1 to be ready ...
Pulling image quay.io/minio/minio:latest
Container started: 2318a8448c1f
Waiting for container <Container: 2318a8448c1f> with image quay.io/minio/minio:latest to be ready ...
Waiting for container <Container: 2318a8448c1f> with image quay.io/minio/minio:latest to be ready ...
Waiting for container <Container: 2318a8448c1f> with image quay.io/minio/minio:latest to be ready ...
Waiting for container <Container: 2318a8448c1f> with image quay.io/minio/minio:latest to be ready ...
[32m2024-11-08 16:43:16.503[0m | [1mINFO    [0m | [36m__main__[0m:[36minitialize_bucket[0m:[36m12[0m - [1mtest-bucket-2 does not exist. Creation...[0m
[32m2024-11-08 16:43:16.507[0m | [1mINFO    [0m | [36m__main__[0m:[36minitialize_bucket[0m:[36m14[0m - [1mtest-bucket-2 created.[0m
[32m2024-11-08 16:43:16.509[0m | [32

In [8]:
#| hide
nbdev.nbdev_export()