In [1]:
import os
from PIL import Image  # this will now use Pillow-SIMD
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

# Input and output directories
input_dir = "/kaggle/input/grand-xray-slam-division-b/train2"
output_dir = "/kaggle/working/train2_resized"

os.makedirs(output_dir, exist_ok=True)

# Collect image files
img_files = [f for f in os.listdir(input_dir) if f.lower().endswith((".png", ".jpg", ".jpeg"))]

def resize_and_save(fname):
    try:
        in_path = os.path.join(input_dir, fname)
        out_path = os.path.join(output_dir, fname)

        # Open with Pillow-SIMD and resize
        img = Image.open(in_path).convert("RGB")
        img = img.resize((600, 600), Image.BILINEAR)

        # Save as JPEG (quality=95 = good tradeoff)
        img.save(out_path, "JPEG", quality=95, optimize=True)
    except Exception as e:
        print(f"❌ Error processing {fname}: {e}")

# Use all available CPU cores for speed
with Pool(processes=cpu_count()) as pool:
    list(tqdm(pool.imap_unordered(resize_and_save, img_files), total=len(img_files)))

print("✅ All images resized to 600×600 and saved in:", output_dir)

  3%|▎         | 2968/108494 [02:19<1:25:24, 20.59it/s]

❌ Error processing 00043046_001_001.jpg: cannot identify image file '/kaggle/input/grand-xray-slam-division-b/train2/00043046_001_001.jpg'


 37%|███▋      | 39926/108494 [30:35<37:33, 30.43it/s]

❌ Error processing 00052495_001_001.jpg: cannot identify image file '/kaggle/input/grand-xray-slam-division-b/train2/00052495_001_001.jpg'


 72%|███████▏  | 77620/108494 [58:49<21:41, 23.71it/s]

❌ Error processing 00056890_001_001.jpg: cannot identify image file '/kaggle/input/grand-xray-slam-division-b/train2/00056890_001_001.jpg'


100%|██████████| 108494/108494 [1:21:42<00:00, 22.13it/s]

✅ All images resized to 600×600 and saved in: /kaggle/working/train2_resized





In [2]:
# Input and output directories
input_dir = "/kaggle/input/grand-xray-slam-division-b/test2"
output_dir = "/kaggle/working/test2_resized"

os.makedirs(output_dir, exist_ok=True)

# Collect image files
img_files = [f for f in os.listdir(input_dir) if f.lower().endswith((".png", ".jpg", ".jpeg"))]

def resize_and_save(fname):
    try:
        in_path = os.path.join(input_dir, fname)
        out_path = os.path.join(output_dir, fname)

        # Open with Pillow-SIMD and resize
        img = Image.open(in_path).convert("RGB")
        img = img.resize((600, 600), Image.BILINEAR)

        # Save as JPEG (quality=95 = good tradeoff)
        img.save(out_path, "JPEG", quality=95, optimize=True)
    except Exception as e:
        print(f"❌ Error processing {fname}: {e}")

# Use all available CPU cores for speed
with Pool(processes=cpu_count()) as pool:
    list(tqdm(pool.imap_unordered(resize_and_save, img_files), total=len(img_files)))

print("✅ All images resized to 600×600 and saved in:", output_dir)

100%|██████████| 47927/47927 [35:37<00:00, 22.43it/s]

✅ All images resized to 600×600 and saved in: /kaggle/working/test2_resized



