In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import tensorflow.keras.backend as K


In [3]:
# Load and preprocess MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize and reshape the data
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

# Add Gaussian noise to the data (doubled noise factor)
noise_factor = 0.1  # Increased from 0.5 to 1.0
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

# Clip the noisy images to be between 0 and 1
x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Create a simple MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile model with MSE loss
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['accuracy']
)

# Train the model with noisy data
history = model.fit(
    x_train_noisy, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss, test_accuracy = model.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nTest accuracy (noisy): {test_accuracy:.4f}")
print(f"Test loss (MSE, noisy): {test_loss:.4f}")

# Make predictions on noisy test data and calculate accuracy manually to verify
y_pred = model.predict(x_test_noisy)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)
manual_accuracy = np.mean(y_pred_classes == y_test_classes)
print(f"Manual verification - Test accuracy (noisy): {manual_accuracy:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss, clean_test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Test loss (MSE, clean): {clean_test_loss:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test accuracy (noisy): 0.9753
Test loss (MSE, noisy): 0.0056
Manual verification - Test accuracy (noisy): 0.9753

Test accuracy (clean): 0.9708
Test loss (MSE, clean): 0.0101


In [4]:
# Create a custom loss function combining MSE and PCC
def custom_loss(y_true, y_pred):
    # MSE term
    mse = tf.reduce_mean(tf.square(y_pred - y_true))
    
    # PCC term
    y_true_centered = y_true - tf.reduce_mean(y_true) 
    y_pred_centered = y_pred - tf.reduce_mean(y_pred)
    
    cov = tf.reduce_sum(y_true_centered * y_pred_centered)
    std_y_true = tf.sqrt(tf.reduce_sum(tf.square(y_true_centered)))
    std_y_pred = tf.sqrt(tf.reduce_sum(tf.square(y_pred_centered)))
    
    pcc = cov / (std_y_true * std_y_pred + K.epsilon())
    
    # Calculate coefficient to match MSE range
    y_true_min = tf.reduce_min(y_true)
    y_true_max = tf.reduce_max(y_true)
    coef = tf.abs(y_true_max - y_true_min)  # Maximum possible MSE value
    
    # Combined loss with coefficient matching MSE range
    return mse + coef * (1.0 - pcc)

# Create the same MLP model
model_custom = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile with custom loss
model_custom.compile(
    optimizer='adam',
    loss=custom_loss,
    metrics=['accuracy']
)

# Train the model with noisy data
history_custom = model_custom.fit(
    x_train_noisy, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss_custom, test_accuracy_custom = model_custom.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (noisy): {test_loss_custom:.4f}")

# Manual verification with noisy data
y_pred_custom = model_custom.predict(x_test_noisy)
y_pred_custom_classes = np.argmax(y_pred_custom, axis=1)
manual_accuracy_custom = np.mean(y_pred_custom_classes == y_test_classes)
print(f"Custom Loss - Manual verification accuracy (noisy): {manual_accuracy_custom:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss_custom, clean_test_accuracy_custom = model_custom.evaluate(x_test, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (clean): {clean_test_loss_custom:.4f}")

# Compare results
print("\nComparison:")
print(f"Original MSE - Test accuracy (noisy): {test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Original MSE - Test accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Custom Loss - Test accuracy (noisy): 0.9756
Custom Loss - Test loss (noisy): 0.0339
Custom Loss - Manual verification accuracy (noisy): 0.9756

Custom Loss - Test accuracy (clean): 0.9713
Custom Loss - Test loss (clean): 0.0515

Comparison:
Original MSE - Test accuracy (noisy): 0.9753
Custom Loss - Test accuracy (noisy): 0.9756
Original MSE - Test accuracy (clean): 0.9708
Custom Loss - Test accuracy (clean): 0.9713


In [5]:
# Load and preprocess MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize and reshape the data
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

# Add Gaussian noise to the data (doubled noise factor)
noise_factor = 3  # Increased from 0.5 to 1.0
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

# Clip the noisy images to be between 0 and 1
x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Create a simple MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile model with MSE loss
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['accuracy']
)

# Train the model with noisy data
history = model.fit(
    x_train_noisy, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss, test_accuracy = model.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nTest accuracy (noisy): {test_accuracy:.4f}")
print(f"Test loss (MSE, noisy): {test_loss:.4f}")

# Make predictions on noisy test data and calculate accuracy manually to verify
y_pred = model.predict(x_test_noisy)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)
manual_accuracy = np.mean(y_pred_classes == y_test_classes)
print(f"Manual verification - Test accuracy (noisy): {manual_accuracy:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss, clean_test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Test loss (MSE, clean): {clean_test_loss:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test accuracy (noisy): 0.2697
Test loss (MSE, noisy): 0.0842
Manual verification - Test accuracy (noisy): 0.2697

Test accuracy (clean): 0.6468
Test loss (MSE, clean): 0.0624


In [6]:
# Create a custom loss function combining MSE and PCC
def custom_loss(y_true, y_pred):
    # MSE term
    mse = tf.reduce_mean(tf.square(y_pred - y_true))
    
    # PCC term
    y_true_centered = y_true - tf.reduce_mean(y_true) 
    y_pred_centered = y_pred - tf.reduce_mean(y_pred)
    
    cov = tf.reduce_sum(y_true_centered * y_pred_centered)
    std_y_true = tf.sqrt(tf.reduce_sum(tf.square(y_true_centered)))
    std_y_pred = tf.sqrt(tf.reduce_sum(tf.square(y_pred_centered)))
    
    pcc = cov / (std_y_true * std_y_pred + K.epsilon())
    
    # Calculate coefficient to match MSE range
    y_true_min = tf.reduce_min(y_true)
    y_true_max = tf.reduce_max(y_true)
    coef = tf.abs(y_true_max - y_true_min)  # Maximum possible MSE value
    
    # Combined loss with coefficient matching MSE range
    return mse + coef * (1.0 - pcc)

# Create the same MLP model
model_custom = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile with custom loss
model_custom.compile(
    optimizer='adam',
    loss=custom_loss,
    metrics=['accuracy']
)

# Train the model with noisy data
history_custom = model_custom.fit(
    x_train_noisy, y_train,
    batch_size=128,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss_custom, test_accuracy_custom = model_custom.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (noisy): {test_loss_custom:.4f}")

# Manual verification with noisy data
y_pred_custom = model_custom.predict(x_test_noisy)
y_pred_custom_classes = np.argmax(y_pred_custom, axis=1)
manual_accuracy_custom = np.mean(y_pred_custom_classes == y_test_classes)
print(f"Custom Loss - Manual verification accuracy (noisy): {manual_accuracy_custom:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss_custom, clean_test_accuracy_custom = model_custom.evaluate(x_test, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (clean): {clean_test_loss_custom:.4f}")

# Compare results
print("\nComparison:")
print(f"Original MSE - Test accuracy (noisy): {test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Original MSE - Test accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Custom Loss - Test accuracy (noisy): 0.2705
Custom Loss - Test loss (noisy): 0.8319
Custom Loss - Manual verification accuracy (noisy): 0.2705

Custom Loss - Test accuracy (clean): 0.6746
Custom Loss - Test loss (clean): 0.4865

Comparison:
Original MSE - Test accuracy (noisy): 0.2697
Custom Loss - Test accuracy (noisy): 0.2705
Original MSE - Test accuracy (clean): 0.6468
Custom Loss - Test accuracy (clean): 0.6746


In [28]:
# Load and preprocess MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize and reshape the data
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

# Create imbalanced dataset with variable ratios
# Define imbalance ratios for each digit (0-9)
imbalance_ratios = {
    0: 1.0,    # Keep all samples
    1: 0.8,    # Keep 80% of samples
    2: 0.6,    # Keep 60% of samples
    3: 0.4,    # Keep 40% of samples
    4: 0.3,    # Keep 30% of samples
    5: 0.25,   # Keep 25% of samples
    6: 0.2,    # Keep 20% of samples
    7: 0.15,   # Keep 15% of samples
    8: 0.1,    # Keep 10% of samples
    9: 0.05    # Keep 5% of samples
}

# Create masks for imbalanced sampling
train_indices = []
for digit in range(10):
    digit_indices = np.where(y_train == digit)[0]
    num_samples = int(len(digit_indices) * imbalance_ratios[digit])
    selected_indices = np.random.choice(digit_indices, num_samples, replace=False)
    train_indices.extend(selected_indices)

train_indices = np.array(train_indices)
np.random.shuffle(train_indices)

# Select the imbalanced subset
x_train = x_train[train_indices]
y_train = y_train[train_indices]

# Add Gaussian noise to the data
noise_factor = 1.0
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

# Clip the noisy images to be between 0 and 1
x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Print class distribution
print("Class distribution in training set:")
for i in range(10):
    count = np.sum(np.argmax(y_train, axis=1) == i)
    print(f"Digit {i}: {count} samples")

# Create a simple MLP model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile model with MSE loss
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['accuracy']
)

# Train the model with noisy data
history = model.fit(
    x_train_noisy, y_train,
    batch_size=256,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss, test_accuracy = model.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nTest accuracy (noisy): {test_accuracy:.4f}")
print(f"Test loss (MSE, noisy): {test_loss:.4f}")

# Make predictions and calculate per-class accuracy
y_pred = model.predict(x_test_noisy)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

print("\nPer-class accuracy:")
for i in range(10):
    mask = (y_test_classes == i)
    class_acc = np.mean(y_pred_classes[mask] == y_test_classes[mask])
    print(f"Digit {i}: {class_acc:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss, clean_test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Test loss (MSE, clean): {clean_test_loss:.4f}")


Class distribution in training set:
Digit 0: 5923 samples
Digit 1: 5393 samples
Digit 2: 3574 samples
Digit 3: 2452 samples
Digit 4: 1752 samples
Digit 5: 1355 samples
Digit 6: 1183 samples
Digit 7: 939 samples
Digit 8: 585 samples
Digit 9: 297 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test accuracy (noisy): 0.5147
Test loss (MSE, noisy): 0.0653

Per-class accuracy:
Digit 0: 0.9408
Digit 1: 0.9322
Digit 2: 0.7025
Digit 3: 0.7465
Digit 4: 0.7536
Digit 5: 0.1726
Digit 6: 0.4207
Digit 7: 0.3804
Digit 8: 0.0000
Digit 9: 0.0000

Test accuracy (clean): 0.6397
Test loss (MSE, clean): 0.0786


In [29]:
# Create a custom loss function combining MSE and PCC
def custom_loss(y_true, y_pred):
    # MSE term
    mse = tf.reduce_mean(tf.square(y_pred - y_true))
    
    # PCC term
    y_true_centered = y_true - tf.reduce_mean(y_true) 
    y_pred_centered = y_pred - tf.reduce_mean(y_pred)
    
    cov = tf.reduce_sum(y_true_centered * y_pred_centered)
    std_y_true = tf.sqrt(tf.reduce_sum(tf.square(y_true_centered)))
    std_y_pred = tf.sqrt(tf.reduce_sum(tf.square(y_pred_centered)))
    
    pcc = cov / (std_y_true * std_y_pred + K.epsilon())
    
    # Calculate coefficient to match MSE range
    y_true_min = tf.reduce_min(y_true)
    y_true_max = tf.reduce_max(y_true)
    coef = tf.abs(y_true_max - y_true_min)  # Maximum possible MSE value
    
    # Combined loss with coefficient matching MSE range
    return mse + coef * (1.0 - pcc)

# Create the same MLP model
model_custom = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10)
])

# Compile with custom loss
model_custom.compile(
    optimizer='adam',
    loss=custom_loss,
    metrics=['accuracy']
)

# Train the model with noisy data
history_custom = model_custom.fit(
    x_train_noisy, y_train,
    batch_size=256,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

# Evaluate on test set with noisy data
test_loss_custom, test_accuracy_custom = model_custom.evaluate(x_test_noisy, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (noisy): {test_loss_custom:.4f}")

# Make predictions and calculate per-class accuracy
y_pred_custom = model_custom.predict(x_test_noisy)
y_pred_custom_classes = np.argmax(y_pred_custom, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

print("\nCustom Loss - Per-class accuracy:")
for i in range(10):
    mask = (y_test_classes == i)
    class_acc = np.mean(y_pred_custom_classes[mask] == y_test_classes[mask])
    print(f"Digit {i}: {class_acc:.4f}")

# For comparison, evaluate on clean test data
clean_test_loss_custom, clean_test_accuracy_custom = model_custom.evaluate(x_test, y_test, verbose=0)
print(f"\nCustom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")
print(f"Custom Loss - Test loss (clean): {clean_test_loss_custom:.4f}")

# Compare results
print("\nComparison:")
print(f"Original MSE - Test accuracy (noisy): {test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (noisy): {test_accuracy_custom:.4f}")
print(f"Original MSE - Test accuracy (clean): {clean_test_accuracy:.4f}")
print(f"Custom Loss - Test accuracy (clean): {clean_test_accuracy_custom:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Custom Loss - Test accuracy (noisy): 0.5664
Custom Loss - Test loss (noisy): 0.4737

Custom Loss - Per-class accuracy:
Digit 0: 0.9010
Digit 1: 0.9269
Digit 2: 0.6986
Digit 3: 0.6911
Digit 4: 0.7536
Digit 5: 0.2993
Digit 6: 0.6712
Digit 7: 0.6391
Digit 8: 0.0031
Digit 9: 0.0000

Custom Loss - Test accuracy (clean): 0.6680
Custom Loss - Test loss (clean): 0.4633

Comparison:
Original MSE - Test accuracy (noisy): 0.5147
Custom Loss - Test accuracy (noisy): 0.5664
Original MSE - Test accuracy (clean): 0.6397
Custom Loss - Test accuracy (clean): 0.6680
