In [2]:
import numpy as np
from tensorflow.keras.datasets import cifar10, cifar100

In [3]:
def preprocess_images(images):
    #Convert images to grayscale
    gray_images  = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images]
    #Equalize the histogram of grayscale images for better contrast
    equalized_images = [cv2.equalizeHist(img) for img in gray_images]
    #Reshape the images to the desired shape (e.g., (32, 32, 1))
    images_array = np.array(equalized_images).reshape((-1, 32, 32, 1))
    #Normalize pixel values to the range [0, 1]
    images_normalized = images_array.astype('float32') / 255.0
    #Apply Gaussian blur to the normalized images
    blurred_images = np.array([cv2.GaussianBlur(img, (5, 5), 0) for img in images_normalized])

    return blurred_images

In [4]:
def display_histogram(train_labels, test_labels, title="Dataset Label Distribution"):
    # Create a figure with two subplots (side by side).
    plt.figure(figsize=(10, 5))

    # Plotting histogram for train labels in the first subplot.
    plt.subplot(1, 2, 1)
    plt.hist(train_labels, bins=len(set(train_labels)), color='blue', alpha=0.7)
    plt.title('Train Labels')   # Set the title for the train labels histogram.
    plt.xlabel('Label')         # Set the x-axis label.
    plt.ylabel('Frequency')     # Set the y-axis label.

    # Plotting histogram for test labels in the second subplot.
    plt.subplot(1, 2, 2)
    plt.hist(test_labels, bins=len(set(test_labels)), color='green', alpha=0.7)
    plt.title('Test Labels')    # Set the title for the test labels histogram.
    plt.xlabel('Label')         # Set the x-axis label.
    plt.ylabel('Frequency')     # Set the y-axis label.

    # Set the main title for the entire figure.
    plt.suptitle(title)

    # Adjust the layout to prevent subplot overlap and display the figure.
    plt.tight_layout()
    plt.show()

In [7]:
def filter_and_combine_datasets(cifar10_train_images, cifar10_train_labels, cifar100_train_images, cifar100_train_labels):
    # Define unique mappings for each dataset to avoid overlap
    cifar10_classes = {'C10_automobile': 1, 'C10_bird': 2, 'C10_cat': 3, 'C10_deer': 4, 'C10_dog': 5, 'C10_horse': 7, 'C10_truck': 9}
    cifar100_classes = {'C100_cattle': 11, 'C100_fox': 34, 'C100_baby': 2, 'C100_boy': 11, 'C100_girl': 35, 'C100_man': 44, 'C100_woman': 98,
                        'C100_rabbit': 65, 'C100_squirrel': 78, 'C100_trees': 84, 'C100_bicycle': 8, 'C100_bus': 13,
                        'C100_motorcycle': 48, 'C100_pickup truck': 58, 'C100_train': 95, 'C100_lawn-mower': 48, 'C100_tractor': 86}

    # Filter datasets by redefined class mappings
    def filter_dataset(images, labels, class_mapping):
        filtered_images = []
        filtered_labels = []
        for img, lbl in zip(images, labels):
            class_id = lbl[0]
            for key, value in class_mapping.items():
                if class_id == value:
                    filtered_images.append(img)
                    filtered_labels.append(key)
        return filtered_images, filtered_labels

    cifar10_filtered_images, cifar10_filtered_labels = filter_dataset(cifar10_train_images, cifar10_train_labels, cifar10_classes)
    cifar100_filtered_images, cifar100_filtered_labels = filter_dataset(cifar100_train_images, cifar100_train_labels, cifar100_classes)

    # Combine filtered datasets
    combined_images = cifar10_filtered_images + cifar100_filtered_images
    combined_labels = cifar10_filtered_labels + cifar100_filtered_labels

    return combined_images, combined_labels
