## Clean images to avoid the error "Corrupt JPEG data: 27 extraneous bytes before marker 0xdb" 

In [None]:
import os 
from PIL import Image

def check_and_fix_image(file_path):
    """Checks if the image is corrupted and fixes it if possible"""
    try:
        with Image.open(file_path) as img:
            img.verify() 
    except (IOError, SyntaxError) as e:
        print(f"Corrupted file : {file_path} - {e}")
        return False
    return True

def reprocess_image(file_path):
    """Reprocesses the image to fix any issues"""
    try:
        with Image.open(file_path) as img:
            img = img.convert("RGB")  
            img.save(file_path, "JPEG", quality=95)  
    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Reprocesses all images in the given directory
base_path = "../data/Cytologia/images/"
for root, dirs, files in os.walk(base_path):
    for file in files:
        if file.lower().endswith((".jpg", ".jpeg")):
            reprocess_image(os.path.join(root, file))