In [None]:
# import hashlib
# hash_list = []

# # Check if the image is not already copied
# image_hash = hashlib.sha256(file_path.encode('utf-8'))
# if image_hash.hexdigest() not in hash_list:

#     hash_list.append(image_hash.hexdigest())

In [None]:
pip install pillow

In [102]:
import os, csv
import numpy as np
from PIL import Image
import shutil
import random
from torch.utils.data import Dataset

In [None]:
# Function for visualization
def visualize_img(image: np.ndarray):
    img = image.copy()
    img = np.squeeze(img)
    img = Image.fromarray(img)
    return img

In [64]:
def validate_images(input_dir):
    # Get absolute path
    input_dir = os.path.abspath(input_dir)
    
    try:
        # Check if the directory exists 
        os.listdir(input_dir)
    except FileNotFoundError:
        raise ValueError(f"{input_dir} is not an existing directory")
    
    # Save the path of files 
    files = []
    for folder, subfolder, file in os.walk(input_dir):
        for f in file:
            files.append(os.path.join(folder, f))

    # Check the images requisite
    for idx, file_path in enumerate(files):
        # Check the extension
        if file_path.lower().endswith(('.jpg', '.jpeg')):
            try:
                # Open the image
                my_image = Image.open(file_path)

                # Check the image dimension (min 10kB, max 250 kB)
                if os.path.getsize(file_path) > 10240 and os.path.getsize(file_path) <= 250000:
                    
                    # Check if the image is in RGB mode
                    if my_image.mode == "RGB":

                        #Convert my_image in numpy array
                        image_data = np.array(my_image)
                        
                        # Check if the image has the height and width min request (100 pixel)
                        h, w, c = image_data.shape
                        if h >= 224 and w >= 224:
                            
                            # Check if the variance of pixels is greater than 10 to avoid monotone images
                            if np.var(image_data) > 10:
                                continue  # Valid image, check the next one
                            else:
                                print(f"Image {file_path} is invalid: Variance is less than 10.")
                        else:
                            print(f"Image {file_path} is invalid: Dimensions are too small ({h}x{w}).")
                    else:
                        print(f"Image {file_path} is invalid: Not in RGB mode.")
                else:
                    print(f"Image {file_path} is invalid: File size is not correct.")
            
            except Exception as e:
                print(f"Image {file_path} could not be processed: {e}")
        
            # Delete the image is not valid
            try:
                os.remove(file_path)
                print(f"Deleted invalid image: {file_path}")
            except Exception as e:
                print(f"Failed to delete {file_path}: {e}")

        else:
            print(f"File {file_path} is not an image.")
        
        
validate_images("food_13/images")

File c:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\food_13\images\.DS_Store is not an image.
Image c:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\food_13\images\chicken_wings\1009927.jpg is invalid: Dimensions are too small (140x512).
Deleted invalid image: c:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\food_13\images\chicken_wings\1009927.jpg
Image c:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\food_13\images\hot_dog\1114633.jpg is invalid: Dimensions are too small (512x193).
Deleted invalid image: c:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\food_13\imag

Now we are going to reduce the number of images in each folder. The 13th folders have 1000 images each one of them. We are going to reduce them of the 80%.

In [68]:
def reduce_images(input_dir, output_dir, num_images):
    # List all the files in the selected folder
    all_files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
    
    # Random selection of the files that will be mantained
    files_to_mantain = random.sample(all_files, num_images)
    
    # Check that the output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # CCopy selected files in the output directory
    for file in files_to_mantain:
        shutil.copy(os.path.join(input_dir, file), os.path.join(output_dir, file))
    
    print(f"Folder '{input_dir}' reduced to {num_images} images.")



input_dir = "food_13/images"
output_dir = "food_13_small"
# Percentage of images to mantain
num_images = 200
# Set the seed
random.seed(42)

# Apply the function to all the folders
for folder in os.listdir(input_dir):
    folder_dir = os.path.join(input_dir, folder)
    output_folder_dir = os.path.join(output_dir, folder)
    if os.path.isdir(folder_dir):
        reduce_images(folder_dir, output_folder_dir, num_images)

Folder 'food_13/images\beef_tartare' reduced to 200 images.
Folder 'food_13/images\bruschetta' reduced to 200 images.
Folder 'food_13/images\caesar_salad' reduced to 200 images.
Folder 'food_13/images\cannoli' reduced to 200 images.
Folder 'food_13/images\chicken_wings' reduced to 200 images.
Folder 'food_13/images\chocolate_cake' reduced to 200 images.
Folder 'food_13/images\club_sandwich' reduced to 200 images.
Folder 'food_13/images\dumplings' reduced to 200 images.
Folder 'food_13/images\hot_dog' reduced to 200 images.
Folder 'food_13/images\ice_cream' reduced to 200 images.
Folder 'food_13/images\pizza' reduced to 200 images.
Folder 'food_13/images\ramen' reduced to 200 images.


Now we create a csv file for the lables. Each images in the dataset will have its label.

In [75]:
# Create csv file

input_dir = "food_13_small"
csv_file_path = "labels.csv"

with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
    writer = csv.writer(csv_file, delimiter=';')
    # Head
    writer.writerow(["id", "name", "label"])
    # Itera attraverso ogni cartella
    idx = 0
    for folder in os.listdir(input_dir):
        folder_path = os.path.join(input_dir, folder)
        if os.path.isdir(folder_path):
            # Itera attraverso i file nella cartella
            for file in os.listdir(folder_path):
                if os.path.isfile(os.path.join(folder_path, file)):
                    # Scrivi nome del file e label (nome della cartella)
                    writer.writerow([idx, file, folder])
                    idx += 1

Converts all the images in grayscale

In [71]:
def to_grayscale(pil_image: np.ndarray):
    if pil_image.ndim == 2: #.ndim check the dimension of the pil array
        return pil_image.copy()[None] #Image already B&W
    """
    None --> Aggiunge una nuova dimensione al primo asse. (H, W) --> (1, H, W)
    Nei framework come PyTorch, i modelli spesso richiedono input con una dimensione di batch, 
    anche se hai solo una singola immagine. [None] aggiunge questa dimensione di batch.
    Same is pil_image.copy()[np.newaxis, :, :]. Qui viene esplicitato l'utilizzo di np.newaxis e si specificano 
    tre dimensioni: una nuova dimensione (aggiunta come primo asse) seguita dagli assi originali (:, :).
    Questo è utile se sai esattamente quante dimensioni ha il tuo array.
    """
    if pil_image.ndim != 3:
        raise ValueError("image must have either shape (H, W) or (H, W, 3)")
    if pil_image.shape[2] == 3:
        image_copy = pil_image.copy()
    else:
        raise ValueError(f"image has shape (H, W, {pil_image.shape[2]}), but it should have (H, W, 3)")
    
    # Normalize the image to [0, 1] range
    rgb = image_copy / 255

    # Colorimetric conversion to grayscale
    C_linear = np.where(
        rgb < 0.04045,
        rgb / 12.92,
        ((rgb + 0.055) / 1.055) ** 2.4
    )

    Y_linear = 0.2126 * C_linear[:,:, 0] + 0.7152 * C_linear[:,:, 1] + 0.0722 * C_linear[:,:,2]
    
    Y = np.where(
        Y_linear < 0.0031308,
        12.92 * Y_linear,
        1.055 * Y_linear ** (1 / 2.4) - 0.055
    )
    grayscale = Y * 255
    
    # Mantain coherence between values in grayscale image and values original pil_image
    if np.issubdtype(pil_image.dtype, np.integer):
        grayscale = np.round(grayscale)

    # Converts the grayscale array to the data type (dtype) of the original pil_image
    grayscale = grayscale.astype(pil_image.dtype)

    # Add tnew dimension, as at the beginning with [None]
    grayscale = np.expand_dims(grayscale, axis=0)

    return grayscale

Now we prepare the images with the same dimension. So resize all the images at th esame dimension.

In [76]:
def prepare_image(image: np.ndarray, height: int, width: int):
    
    image = image.copy()

    # Cropping Height
    if image.shape[1] > height:
        crop_size = image.shape[1] - height
        crop_top = crop_size // 2
        crop_bottom = crop_size - crop_top
        image = image[:, crop_bottom:-crop_top, :]
    # Padding Height
    else: 
        padding_size = height - image.shape[1]      # Number of pixels to add
        top_pad = padding_size // 2                 # How many pad pixels on top
        bottom_pad = padding_size - top_pad         # How many pad pixels on bottom
        image = np.pad(image, ((0, 0), (top_pad, bottom_pad), (0, 0)), mode='edge')
    
    # Cropping Width
    if image.shape[2] > width:
        crop_size = image.shape[2] - width
        left_crop = crop_size // 2
        right_crop = crop_size - left_crop
        image = image[:, :, left_crop:-right_crop]
    # Padding Width
    else:
        padding_size = width - image.shape[2]
        left_pad = padding_size // 2
        right_pad = padding_size - left_pad
        image = np.pad(image, ((0, 0), (0, 0), (left_pad, right_pad)), mode='edge')
        """
        ((0, 0), (top_pad, bottom_pad), (0, 0)): Questo è l'argomento pad_width, che specifica quanto 
        padding deve essere aggiunto a ciascun lato dell'array lungo ciascuna delle sue dimensioni.
        Qui, stiamo specificando il padding solo per l'asse verticale (dimensione dell'immagine), 
        quindi (0, 0) significa che non viene aggiunto padding all'array lungo la prima e la terza 
        dimensione (l'asse dei canali dei colori per un'immagine a colori e la larghezza).
        mode='edge': Questo specifica la modalità di padding, che in questo caso è 'edge'. 
        La modalità 'edge' significa che il valore del bordo dell'immagine verrà utilizzato 
        per estendere il padding. In altre parole, i pixel lungo i bordi dell'immagine vengono replicati 
        per riempire lo spazio di padding
        
        https://numpy.org/doc/stable/reference/generated/numpy.pad.html
        """
    
    return image


In [77]:
my_image = Image.open("Prova/18511.jpg")
pil_image = np.array(my_image)
grayscale_img = to_grayscale(pil_image)
final_img = prepare_image(grayscale_img, height=224, width=224)

In [99]:
def ImagesDataset(Dataset):
    
    def __init__(self, dataset_dir, width = 224, height = 224, dtype = None):
        """
        image_dir: direcotry of the images
        dtype: 
        """
        if width < 224 or height < 224:
            raise ValueError("Width and height must be at least 224.")
        self.width = width
        self.height = height
        
        # Get absolute paths of image files and sort them
        abs_path = os.path.join(dataset_dir) #Take absolute path
        # all_files = os.listdir(abs_path) #List of files in abs path

        files = []
        idx_classes = {}

        for f in os.listdir(abs_path):
            if f.split('.')[-1] == "jpg":
                files.append(os.path.join(abs_path, f))

            if f.split('.')[-1] == "csv":
                # Load class names from CSV file and assign class IDs
                self.df = pd.read_csv(os.path.join(abs_path, f), sep=';', header=0)
                #Take one time all the classes in column label, and convert them to a list 
                classes = self.df["label"].unique().tolist() 
                classes = sorted(classes)
                for idx, class_i in enumerate(classes):
                    class2id[class_i] = idx
                self.df['classes_idx'] = self.df['label'].map(class2id)
        
        self.image_filepath = files
        
        self.dtype = dtype
    
    def __getitem__(self, index):
        # Load i-th image
        image = Image.open(self.files_paths[index])
    
        #Convert the image in an numpy array with specified dtype if dtype is not None
        image = np.array(image, dtype=self.dtype)
     
        #Convert the image to grayscale
        image = to_grayscale(image)
        
        #Used the prepare_image function to rescale the image to the width and height specified in the __init__
        image = prepare_image(image, self.width, self.height)
        
        file_name = os.path.basename(self.files_paths[index])
        class_id = self.df['classes_idx'][self.df['name'] == file_name].values[0]
        class_name = self.df['label'][self.df['name'] == file_name].values[0]
        
        return image, class_name, class_id

    def __len__(self):
        return len(self.image_filepath)

In [100]:
image_dir = r"C:\Users\miche.LAPTOP-KKEENNGV\OneDrive\Desktop\Università\2° Anno\Advanced Programming and Deep Learning for AI\Part 2 (Lin)\Project\Prova"
dataset = ImagesDataset(image_dir, 224, 224, int)
for resized_image, classid, classname, _ in dataset:
    print(f'image shape: {resized_image.shape}, dtype: {resized_image.dtype}, 'f'classid: {classid}, classname: {classname}\n')
    

TypeError: ImagesDataset() takes 1 positional argument but 4 were given