In [1]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

import tensorflow as tf

import random

import cv2

2024-05-08 11:31:38.952525: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
NUM_CLASSES = 5
NUM_SAMPLES_PER_CLASS = 1

In [3]:
PATH = "/mnt/d/Numing/projects/aibuilders/pills_detection/dataset2/test/images"

# Get all images
all_files = list()
for (dirpath, dirnames, filenames) in os.walk(PATH):
    all_files += [os.path.join(dirpath, file) for file in filenames]
print(f"The dataset contains {len(all_files)} images.")

The dataset contains 90 images.


In [5]:
def read_and_propocess_image(img_path, image_dim=None):
    """
    Takes an image path and returns a grayscale, resized, and flattened image as array
    
    Args:
    * img_path: Full path to file
    * image_dim: Resized image shape (width, height)
    
    Returns:
    a grayscale, resized, and flattened image
    """
    # Read image as grayscale
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    # Resize image
    if image_dim is not None:
        img = cv2.resize(img, image_dim, interpolation = cv2.INTER_AREA) 
    
    img = img.astype(np.float32) / 255.0
    img = 1.0 - img
    img = img.reshape(-1)
    return img

In [6]:
BATCH_SIZE = 16

class DataLoader():
    """
    Data Loader Few-Shot Learning 
    Forked from https://github.com/cbfinn/maml
    """
    
    def __init__(self, num_classes, num_samples_per_class):
        """
        args:
        * num_classes: N classes in support set 
        * num_samples_per_class: K samples per class in support set
        """
        self.num_classes = num_classes
        self.num_samples_per_class = num_samples_per_class
        print(f"Data Loader for {num_classes}-way, {num_samples_per_class}-shot Few-Shot Learning.")
        
        # Dataset specific
        self.meta_train_folders = "/mnt/d/Numing/projects/aibuilders/pills_detection/dataset2/train/images"
        self.meta_val_folders = "/mnt/d/Numing/projects/aibuilders/pills_detection/dataset2/validation/images"
        self.meta_test_folders = "/mnt/d/Numing/projects/aibuilders/pills_detection/dataset2/test/images"
    
    def sample_batch(self, batch_type, batch_size):
        """
        returns:
        * image_batch with shape [batch_size, num_classes, num_samples_per_class, (im_size * im_size)]
        * label_batch with shape [batch_size, num_classes, num_samples_per_class, num_classes]
        """
        all_image_batches, all_label_batches = [], []
        
        # Set the folder according to purpose
        if batch_type == 'train':
            base_path = self.meta_train_folders
        elif batch_type == 'val':
            base_path = self.meta_val_folders
        elif batch_type == 'test':
            base_path = self.meta_test_folders
        else: 
            print(f"Error: '{batch_type}' is not a valid value for batch_type. batch_type must be either one of 'train', 'val', 'test'.")
        
        folders = [os.path.join(base_path, folder, character)
                      for folder in os.listdir(base_path)
                      for character in os.listdir(os.path.join(base_path, folder))]
        
        for i in range(batch_size):
            # Sample N different classes from the folders
            sampled_character_folders = random.sample(folders, self.num_classes)

            # Sample K images per class and associate labels
            sampler = lambda x: random.sample(x, self.num_samples_per_class)
            labels_and_images = [(i, os.path.join(path, image)) \
                for i, path in zip(range(self.num_classes), sampled_character_folders) \
                for image in sampler(os.listdir(path))]

            labels = [li[0] for li in labels_and_images]
            images = [read_and_propocess_image(li[1], (28, 28)) for li in labels_and_images]

            # Format images to fit [num_classes, num_samples_per_class, (im_size * im_size)]
            images = np.stack(images)
            images = np.reshape(images, (self.num_classes, self.num_samples_per_class, -1))

            # Format labels one-hot encoded to fit [ num_classes, num_samples_per_class, num_classes]
            labels = np.array(labels)
            labels = np.reshape(labels, (self.num_classes, self.num_samples_per_class))
            labels = np.eye(self.num_classes)[labels]

            batch = np.concatenate([labels, images], 2)

            # Shuffle classes such that classes are not associated with the order
            for p in range(self.num_samples_per_class):
                np.random.shuffle(batch[:, p])
                
            labels = batch[:, :, :self.num_classes]
            images = batch[:, :, self.num_classes:]

            all_image_batches.append(images)
            all_label_batches.append(labels)
            
        all_image_batches = np.stack(all_image_batches)
        all_label_batches = np.stack(all_label_batches)

        return all_image_batches, all_label_batches