In [1]:
import numpy as np
import os
import pickle
import cv2
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import KFold

In [2]:
# Step 1: Load CIFAR-10 data (Noisy data)
def load_noise_cifar10(noise_level):
    X = np.load(f'{noise_level}_cifar10.npy')
    return X

In [3]:
# Step 2: Resize images to original size (32x32)
def resize_image_to_original_size(image, target_size=(32, 32)):
    """ Resize the image back to 32x32 """
    return cv2.resize(image, target_size)

In [4]:
def resize_dataset(X):
    """ Resize the entire dataset """
    return np.array([resize_image_to_original_size(img) for img in X])

In [5]:
# Step 3: Build the MLP model
def build_mlp_model(input_shape, num_classes, learning_rate, num_units, dropout_rate):
    model = Sequential([
        Flatten(input_shape=input_shape),  # Dynamically adjust input shape
        Dense(num_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(num_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [6]:
# Step 4: Define cross-validation function
def cross_validate_and_select_best_params(model, X, y, n_splits=5):
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    accuracies = []
    for train_index, test_index in kfold.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train, epochs=5, batch_size=128, verbose=0)
        scores = model.evaluate(X_test, y_test, verbose=0)
        accuracies.append(scores[1])
    return np.mean(accuracies)

In [7]:
# Step 5: Load labels
def load_cifar10_labels(data_dir):
    Y = []
    for i in range(1, 6):
        with open(os.path.join(data_dir, f'data_batch_{i}'), 'rb') as file:
            batch = pickle.load(file, encoding='latin1')
            Y.extend(batch['labels'])
    with open(os.path.join(data_dir, 'test_batch'), 'rb') as file:
        test_batch = pickle.load(file, encoding='latin1')
        Y_test = test_batch['labels']
    Y.extend(Y_test)
    Y = np.array(Y)
    return Y


In [8]:
# Step 6: Process and train on noisy CIFAR-10 datasets
def process_and_train_on_noisy_data():
    # List of noise levels
    noise_levels = ['small_noise', 'medium_noise', 'large_noise']

    # Load labels (assumed to be the same for all noise levels)
    data_dir = 'cifar-10-batches-py'
    y_combined = load_cifar10_labels(data_dir)
    y_combined = to_categorical(y_combined, 10)

    # Hyperparameters
    learning_rates = [0.001, 0.0005, 0.0015, 0.01, 0.005]
    unit_sizes = [8, 16, 32, 64, 128]
    dropout_rates = [0]

    # Loop through each noise level
    for noise_level in noise_levels:
        print(f"Processing {noise_level} dataset...")
        
        # Load and resize the dataset
        X_combined = load_noise_cifar10(noise_level)
        X_combined = resize_dataset(X_combined)
        X_combined = X_combined.astype('float32') / 255.0

        highest_accuracy = 0
        highest_config = []

        # Loop through hyperparameter combinations
        for learning_rate in learning_rates:
            for num_units in unit_sizes:
                for dropout_rate in dropout_rates:
                    # Build and train model
                    model = build_mlp_model((32, 32, 3), 10, learning_rate, num_units, dropout_rate)
                    avg_accuracy = cross_validate_and_select_best_params(model, X_combined, y_combined)
                    
                    # Print current config and accuracy
                    print(f"Config: LR={learning_rate}, Units={num_units}, Dropout={dropout_rate} | Accuracy={avg_accuracy:.4f}")
                    
                    # Track the highest accuracy
                    if avg_accuracy > highest_accuracy:
                        highest_accuracy = avg_accuracy
                        highest_config = [learning_rate, num_units, dropout_rate]
                        print("New highest found:", highest_config, "with accuracy:", highest_accuracy)

        # Print final results for each noise level
        print(f"Highest configuration for {noise_level}: {highest_config}")
        print(f"Highest accuracy for {noise_level}: {highest_accuracy}")

# Call the main function to process and train on noisy datasets
process_and_train_on_noisy_data()

Processing small_noise dataset...


  super().__init__(**kwargs)


Config: LR=0.001, Units=8, Dropout=0 | Accuracy=0.2649
New highest found: [0.001, 8, 0] with accuracy: 0.26493332982063295
Config: LR=0.001, Units=16, Dropout=0 | Accuracy=0.2697
New highest found: [0.001, 16, 0] with accuracy: 0.2696666717529297
Config: LR=0.001, Units=32, Dropout=0 | Accuracy=0.4320
New highest found: [0.001, 32, 0] with accuracy: 0.43199999928474425
Config: LR=0.001, Units=64, Dropout=0 | Accuracy=0.4818
New highest found: [0.001, 64, 0] with accuracy: 0.4818000078201294
Config: LR=0.001, Units=128, Dropout=0 | Accuracy=0.4968
New highest found: [0.001, 128, 0] with accuracy: 0.4968000054359436
Config: LR=0.0005, Units=8, Dropout=0 | Accuracy=0.1696
Config: LR=0.0005, Units=16, Dropout=0 | Accuracy=0.3295
Config: LR=0.0005, Units=32, Dropout=0 | Accuracy=0.4092
Config: LR=0.0005, Units=64, Dropout=0 | Accuracy=0.4722
Config: LR=0.0005, Units=128, Dropout=0 | Accuracy=0.4996
New highest found: [0.0005, 128, 0] with accuracy: 0.49961666464805604
Config: LR=0.0015, Uni

In [9]:
# Step 6: Process and train on noisy CIFAR-10 datasets
def process_and_train_on_noisy_data():
    # List of noise levels
    noise_levels = ['small_noise', 'medium_noise', 'large_noise']

    # Load labels (assumed to be the same for all noise levels)
    data_dir = 'cifar-10-batches-py'
    y_combined = load_cifar10_labels(data_dir)
    y_combined = to_categorical(y_combined, 10)

    # Hyperparameters
    learning_rates = [0.001, 0.0005, 0.0015, 0.01, 0.005]
    unit_sizes = [256, 512]
    dropout_rates = [0]

    # Loop through each noise level
    for noise_level in noise_levels:
        print(f"Processing {noise_level} dataset...")
        
        # Load and resize the dataset
        X_combined = load_noise_cifar10(noise_level)
        X_combined = resize_dataset(X_combined)
        X_combined = X_combined.astype('float32') / 255.0

        highest_accuracy = 0
        highest_config = []

        # Loop through hyperparameter combinations
        for learning_rate in learning_rates:
            for num_units in unit_sizes:
                for dropout_rate in dropout_rates:
                    # Build and train model
                    model = build_mlp_model((32, 32, 3), 10, learning_rate, num_units, dropout_rate)
                    avg_accuracy = cross_validate_and_select_best_params(model, X_combined, y_combined)
                    
                    # Print current config and accuracy
                    print(f"Config: LR={learning_rate}, Units={num_units}, Dropout={dropout_rate} | Accuracy={avg_accuracy:.4f}")
                    
                    # Track the highest accuracy
                    if avg_accuracy > highest_accuracy:
                        highest_accuracy = avg_accuracy
                        highest_config = [learning_rate, num_units, dropout_rate]
                        print("New highest found:", highest_config, "with accuracy:", highest_accuracy)

        # Print final results for each noise level
        print(f"Highest configuration for {noise_level}: {highest_config}")
        print(f"Highest accuracy for {noise_level}: {highest_accuracy}")

# Call the main function to process and train on noisy datasets
process_and_train_on_noisy_data()

Processing small_noise dataset...
Config: LR=0.001, Units=256, Dropout=0 | Accuracy=0.5099
New highest found: [0.001, 256, 0] with accuracy: 0.5098833322525025
Config: LR=0.001, Units=512, Dropout=0 | Accuracy=0.5239
New highest found: [0.001, 512, 0] with accuracy: 0.5239499986171723
Config: LR=0.0005, Units=256, Dropout=0 | Accuracy=0.5275
New highest found: [0.0005, 256, 0] with accuracy: 0.5274666547775269
Config: LR=0.0005, Units=512, Dropout=0 | Accuracy=0.5415
New highest found: [0.0005, 512, 0] with accuracy: 0.5414500057697296
Config: LR=0.0015, Units=256, Dropout=0 | Accuracy=0.4942
Config: LR=0.0015, Units=512, Dropout=0 | Accuracy=0.5064
Config: LR=0.01, Units=256, Dropout=0 | Accuracy=0.1953
Config: LR=0.01, Units=512, Dropout=0 | Accuracy=0.0985
Config: LR=0.005, Units=256, Dropout=0 | Accuracy=0.4446
Config: LR=0.005, Units=512, Dropout=0 | Accuracy=0.4122
Highest configuration for small_noise: [0.0005, 512, 0]
Highest accuracy for small_noise: 0.5414500057697296
Process