Downloaded 'imagenet64.tar'


In [13]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

class ImageNetDataGenerator(keras.utils.Sequence):
    def __init__(self, image_paths, labels, batch_size):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return len(self.image_paths) // self.batch_size

    def __getitem__(self, idx):
        batch_x = self.image_paths[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
        
        batch_x_images = [img_to_array(load_img(img_path, target_size=(64, 64))) for img_path in batch_x]
        batch_x_array = np.array(batch_x_images)

        return batch_x_array, np.array(batch_y)


Extracted 'imagenet64.tar' to 'imagenet64' directory


In [16]:
from tqdm import tqdm

def active_learning_random_sampling(image_paths, labels, model, batch_size, n_initial, n_queries, n_samples_per_query):
    # Split data into labeled and unlabeled sets
    labeled_indices = np.random.choice(len(image_paths), size=n_initial, replace=False)
    unlabeled_indices = np.array([i for i in range(len(image_paths)) if i not in labeled_indices])
    labeled_image_paths = np.array(image_paths)[labeled_indices]
    labeled_labels = np.array(labels)[labeled_indices]
    unlabeled_image_paths = np.array(image_paths)[unlabeled_indices]
    unlabeled_labels = np.array(labels)[unlabeled_indices]

    for query in range(n_queries):
        print(f"Query {query + 1}/{n_queries}")
        
        # Train the model on the labeled data
        labeled_data_generator = tqdm( ImageNetDataGenerator(labeled_image_paths, labeled_labels, batch_size))
        model.fit(labeled_data_generator, epochs=1)

        # Randomly sample the data for the next query
        sampled_indices = np.random.choice(len(unlabeled_image_paths), size=n_samples_per_query, replace=False)
        new_labeled_image_paths = unlabeled_image_paths[sampled_indices]
        new_labeled_labels = unlabeled_labels[sampled_indices]

        # Update the labeled and unlabeled sets
        labeled_image_paths = np.concatenate([labeled_image_paths, new_labeled_image_paths])
        labeled_labels = np.concatenate([labeled_labels, new_labeled_labels])
        unlabeled_indices = np.array([i for i in range(len(unlabeled_image_paths)) if i not in sampled_indices])
        unlabeled_image_paths = unlabeled_image_paths[unlabeled_indices]
        unlabeled_labels = unlabeled_labels[unlabeled_indices]

    return model


In [9]:
def get_image_paths_and_labels(root_dir):
    class_dirs = [os.path.join(root_dir, class_name) for class_name in os.listdir(root_dir)]
    image_paths = []
    labels = []

    for label, class_dir in enumerate(class_dirs):
        class_image_paths = [os.path.join(class_dir, image_name) for image_name in os.listdir(class_dir)]
        image_paths.extend(class_image_paths)
        labels.extend([label] * len(class_image_paths))

    return image_paths, labels


In [12]:
# Preprocess the ImageNet training and validation data
train_root = "E:/ML_notebooks/Act_Learn/imagenet64/imagenet64/train"
val_root = "E:/ML_notebooks/Act_Learn/imagenet64/imagenet64/val"
train_image_paths, train_labels = get_image_paths_and_labels(train_root)
val_image_paths, val_labels = get_image_paths_and_labels(val_root)

# Create the model
model = tf.keras.applications.ResNet50V2(weights=None, input_shape=(64, 64, 3), classes=1000)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [17]:
batch_size = 64
n_initial = 240000
n_queries = 10
n_samples_per_query = 24000

model = active_learning_random_sampling(
    train_image_paths, train_labels, model, batch_size, n_initial, n_queries, n_samples_per_query
)

# Evaluate the model on the validation data
val_data_generator = ImageNetDataGenerator(val_image_paths, val_labels, batch_size)
val_accuracy = model.evaluate(val_data_generator)[1]
print(f"Validation accuracy: {val_accuracy:.4f}")


KeyboardInterrupt: 