In [3]:
import pandas as pd
import shutil
import os
from PIL import Image
import imagehash 
import re
import random
import numpy as np
import matplotlib.pyplot as plt 
from tqdm import tqdm
from pathlib import Path

In [None]:
def resize_and_crop_images(input_directory, output_directory, min_size=450):
    os.makedirs(output_directory, exist_ok=True)

    for root, _, files in os.walk(input_directory):
        for filename in files:
            if filename.endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(root, filename)
                try:
                    img = Image.open(img_path)
                    width, height = img.size

                    if width > height:
                        new_width = min_size
                        new_height = int(height * (min_size / width))
                    else:
                        new_width = int(width * (min_size / height))
                        new_height = min_size

                    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

                    left = (new_width - min_size) // 2
                    top = (new_height - min_size) // 2
                    right = left + min_size
                    bottom = top + min_size
                    img_cropped = img_resized.crop((left, top, right, bottom))

                    relative_path = os.path.relpath(root, input_directory) 
                    output_subdir = os.path.join(output_directory, relative_path)
                    os.makedirs(output_subdir, exist_ok=True)  
                    output_path = os.path.join(output_subdir, filename)
                    img_cropped.save(output_path)

                except IOError as e:
                    print(f"Error processing image {filename}: {e}")


input_train_dir = "img/ISIC_2019_Training_Input" 
input_test_dir = "img/ISIC_2019_Test_Input" 
output_dir = "img/Image_Dataset"

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

resize_and_crop_images(input_train_dir, output_dir)
resize_and_crop_images(input_test_dir, output_dir)


In [16]:
def find_min_dimension(directory):
    """Tìm kích thước (pixel) nhỏ nhất trong cả chiều rộng và chiều cao của tất cả ảnh."""
    min_width = float('inf') 
    min_height = float('inf')
    sl = 0

    for filename in os.listdir(directory):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(directory, filename)
            try:
                img = Image.open(img_path)
                width, height = img.size
                if width != 450 and height != 450:
                    print(width,height,sep=", ")
                sl = sl+1
            except IOError:
                print(f"Lỗi mở ảnh: {img_path}")
    print(sl)
    min_dimension = min(min_width, min_height)

directory_path = "img/Image_Dataset" 
min_pixel = find_min_dimension(directory_path)

33569


In [17]:
def image_to_numpy_array(image_path):
    try:
        img = Image.open(image_path)
        img_array = np.array(img)

        if len(img_array.shape) == 2:
            img_array = np.expand_dims(img_array, axis=-1)

        return img_array
    except IOError as e:
        print(f"Error opening image {image_path}: {e}")
        return None


def process_images_to_numpy(input_dir, output_dir):
    """Converts all images in input_dir to NumPy arrays and saves them to output_dir."""
    os.makedirs(output_dir, exist_ok=True)  # Create output directory if it doesn't exist

    for filename in os.listdir(input_dir):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(input_dir, filename)
            img_array = image_to_numpy_array(img_path)

            if img_array is not None:
                output_path = os.path.join(output_dir, filename[:-4] + ".npy")
                np.save(output_path, img_array)


# Example usage:
input_directory = "img/Image_Dataset"  # Directory with resized images
output_directory = "numpy_arrays"  # Directory to save NumPy arrays

process_images_to_numpy(input_directory, output_directory)

print("Conversion to NumPy arrays complete.")

Conversion to NumPy arrays complete.
