In [4]:
from minio import Minio
from minio.error import S3Error
import os
from PIL import Image, ImageChops
import io

# Configuration
MINIO_ENDPOINT = "minio-api-minio.apps.ai-dev01.kni.syseng.devcluster.openshift.com"  # Change to your MinIO endpoint
ACCESS_KEY = "minio"
SECRET_KEY = "minio1234!"
BUCKET_NAME = "edb-aidb"
REMOTE_FOLDER = "recommender_images/"
LOCAL_FOLDER = "downloaded_images"

# Create MinIO client
client = Minio(
    MINIO_ENDPOINT,
    access_key=ACCESS_KEY,
    secret_key=SECRET_KEY,
    secure=True  # Change to False if not using HTTPS
)

# Ensure local folder exists
os.makedirs(LOCAL_FOLDER, exist_ok=True)

def download_images_from_minio_folder():
    try:
        # List objects in the 'recommender_images/' folder
        objects = client.list_objects(BUCKET_NAME, prefix=REMOTE_FOLDER, recursive=True)
        number_images = 0
        for obj in objects:
            if obj.object_name.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp")):
                # Strip folder path for local saving
                filename = os.path.basename(obj.object_name)
                local_file_path = os.path.join(LOCAL_FOLDER, filename)
                client.fget_object(BUCKET_NAME, obj.object_name, local_file_path)
                #print(f"Downloaded: {obj.object_name} → {local_file_path}")
                number_images += 1
                if number_images % 100 == 0:
                    print(f"{number_images} images downloaded.");
    except S3Error as e:
        print("S3 Error:", e)

In [5]:
def crop_white_background(img):
    """Crop white background from an image."""
    if img.mode != 'RGB':
        img = img.convert('RGB')
    bg = Image.new("RGB", img.size, (255, 255, 255))
    diff = ImageChops.difference(img, bg)
    bbox = diff.getbbox()
    return img.crop(bbox) if bbox else img  # Crop if non-white area found

def compress_and_save_image(image, save_path, quality=50):
    """Compress and save image to disk."""
    image.save(save_path, format="JPEG", quality=quality, optimize=True)

def process_images():
    try:
        objects = client.list_objects(BUCKET_NAME, prefix=REMOTE_FOLDER, recursive=True)
        number_images = 0
        for obj in objects:
            if obj.object_name.lower().endswith((".jpg", ".jpeg", ".png")):
                # Download image into memory
                response = client.get_object(BUCKET_NAME, obj.object_name)
                image_data = response.read()
                image = Image.open(io.BytesIO(image_data))

                # Crop and compress
                cropped = crop_white_background(image)
                filename = os.path.basename(obj.object_name)
                local_path = os.path.join(LOCAL_FOLDER, filename)
                compress_and_save_image(cropped, local_path, quality=50)  # Compress by ~50%

                #print(f"Processed and saved: {local_path}")
                response.close()
                response.release_conn()
            number_images += 1
            if number_images % 100 == 0:
                print(f"{number_images} images processed.");

    except S3Error as err:
        print("MinIO error:", err)
    except Exception as e:
        print("General error:", e)

if __name__ == "__main__":
    process_images()

100 images processed.
200 images processed.
300 images processed.
400 images processed.
500 images processed.
600 images processed.
700 images processed.
800 images processed.
900 images processed.
1000 images processed.
1100 images processed.
1200 images processed.
1300 images processed.
1400 images processed.
1500 images processed.
1600 images processed.
1700 images processed.
1800 images processed.
1900 images processed.
2000 images processed.
2100 images processed.
2200 images processed.
2300 images processed.
2400 images processed.
2500 images processed.
2600 images processed.
2700 images processed.
2800 images processed.
2900 images processed.
3000 images processed.
3100 images processed.
3200 images processed.
3300 images processed.
3400 images processed.
3500 images processed.
3600 images processed.
3700 images processed.
3800 images processed.
3900 images processed.
4000 images processed.
4100 images processed.
4200 images processed.
4300 images processed.
4400 images processe