## MNIST original (Naive Bayes)

In [5]:
import numpy as np
import tensorflow as tf

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the training and test sets
x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)
x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)

# Normalize the pixel values
x_train /= 255.0
x_test /= 255.0

number_classes = 10
number_features = x_train.shape[1]  


means = np.zeros((number_classes, number_features))
variances = np.zeros((number_classes, number_features))

def compute_log_likelihood(x, class_index):
    mean = means[class_index]
    variance = variances[class_index]
    log_likelihood = -0.5 * (np.sum(np.log(2 * np.pi * variance)) + np.sum(((x - mean) ** 2) / variance))
    return log_likelihood

#evaluation function
def class_acc(pred, gt):
    pred = np.array(pred)
    gt = np.array(gt)
    
    correct_predictions = np.sum(pred == gt)
    accuracy = (correct_predictions / len(gt)) * 100
    
    return accuracy
np.random.seed(0)
noise1 = np.random.normal(loc=0.0, scale=0.14, size=x_train.shape) #added noise with train data
x_train_noise = x_train + noise1

noise2 = np.random.normal(loc=0.0, scale=0.19, size=x_test.shape) #aaded noise with test data
x_test_noise = x_test + noise2

y_pred = []

for k in range(number_classes):
    class_k_samples = x_train_noise[y_train == k]
    means[k, :] = np.mean(class_k_samples, axis=0)
    variances[k, :] = np.var(class_k_samples, axis=0)
    
for i in range(x_test.shape[0]):
    log_likelihoods = np.array([compute_log_likelihood(x_test[i], k) for k in range(number_classes)])
    predicted_class = np.argmax(log_likelihoods) 
    y_pred.append(predicted_class)
accuracy = class_acc(y_pred, y_test)

print(f'Classification accuracy for original MNIST data with no noise in test data (Naive Bayes): {accuracy:.2f}%')  #np.random.seed(0) for noise

y_test_noise_pred = []
for i in range(x_test.shape[0]):
    log_likelihoods = np.array([compute_log_likelihood(x_test_noise[i], k) for k in range(number_classes)])
    predicted_class = np.argmax(log_likelihoods) 
    y_test_noise_pred.append(predicted_class)
accuracy_test_noise= class_acc(y_test_noise_pred, y_test)

print(f'Classification accuracy for original MNIST data with addesd noise in test data (Naive Bayes): {accuracy_test_noise:.2f}%')  #np.random.seed(0) for noise

Classification accuracy for original MNIST data with no noise in test data (Naive Bayes): 81.50%
Classification accuracy for original MNIST data with addesd noise in test data (Naive Bayes): 84.68%


 **So, after adding noise in the test data the accuracy is 3.18% better in this case**

## MNIST Fashion (Naive Bayes)

In [8]:
mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the training and test sets
x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)
x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)

# Normalize the pixel values
x_train /= 255.0
x_test /= 255.0

number_classes = 10
number_features = x_train.shape[1]  


means = np.zeros((number_classes, number_features))
variances = np.zeros((number_classes, number_features))

def compute_log_likelihood(x, class_index):
    mean = means[class_index]
    variance = variances[class_index]
    log_likelihood = -0.5 * (np.sum(np.log(2 * np.pi * variance)) + np.sum(((x - mean) ** 2) / variance))
    return log_likelihood

#evaluation function
def class_acc(pred, gt):
    pred = np.array(pred)
    gt = np.array(gt)
    # 
    correct_predictions = np.sum(pred == gt)
    accuracy = (correct_predictions / len(gt)) * 100
    
    return accuracy
np.random.seed(0)
noise1 = np.random.normal(loc=0.0, scale=0.06, size=x_train.shape)
x_train_noise = x_train + noise1

noise2 = np.random.normal(loc=0.0, scale=0.1, size=x_test.shape)
x_test_noise = x_test + noise2

y_pred = []

for k in range(number_classes):
    class_k_samples = x_train_noise[y_train == k]
    means[k, :] = np.mean(class_k_samples, axis=0)
    variances[k, :] = np.var(class_k_samples, axis=0)

for i in range(x_test.shape[0]):
    log_likelihoods = np.array([compute_log_likelihood(x_test[i], k) for k in range(number_classes)])
    predicted_class = np.argmax(log_likelihoods) 
    y_pred.append(predicted_class)
accuracy = class_acc(y_pred, y_test)
print(f'Classification accuracy for fashion MNIST data with no noise in test data (Naive Bayes): {accuracy:.2f}%') #np.random.seed(0) for noise

y_test_noise_pred = []

for i in range(x_test.shape[0]):
    log_likelihoods = np.array([compute_log_likelihood(x_test_noise[i], k) for k in range(number_classes)])
    predicted_class = np.argmax(log_likelihoods) 
    y_test_noise_pred.append(predicted_class)
 
accuracy_test_noise= class_acc(y_test_noise_pred, y_test)
print(f'Classification accuracy for fashion MNIST data with addesd noise in test data (Naive Bayes): {accuracy_test_noise:.2f}%') #np.random.seed(0) for noise

Classification accuracy for fashion MNIST data with no noise in test data (Naive Bayes): 67.34%
Classification accuracy for fashion MNIST data with addesd noise in test data (Naive Bayes): 71.77%


**So, after adding noise in the test data the accuracy is 4.43% better in this case.**

## MNIST original (Full Bayes)

In [10]:
import numpy as np
import tensorflow as tf
from scipy.stats import multivariate_normal

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the training and test sets
x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)
x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)

# Normalize the pixel values
x_train /= 255.0
x_test /= 255.0

# Function to compute classification accuracy
def class_acc(pred, gt):
    pred = np.array(pred)
    gt = np.array(gt)
    correct_predictions = np.sum(pred == gt)
    accuracy = (correct_predictions / len(gt)) * 100
    return accuracy

# Function to calculate log likelihoods and predictions
def calculate_predictions(x_train_noise, x_test, y_train):
    number_classes = 10
    number_features = x_train_noise.shape[1]

    means = np.zeros((number_classes, number_features))
    covariances = np.zeros((number_classes, number_features, number_features))

    # Compute mean and covariance for each class
    for k in range(number_classes):
        class_k_samples = x_train_noise[y_train == k]
        means[k, :] = np.mean(class_k_samples, axis=0)
        covariances[k, :, :] = np.cov(class_k_samples.T)

    log_likelihoods = np.zeros((x_test.shape[0], number_classes))

    for k in range(number_classes):
        log_likelihoods[:, k] = multivariate_normal.logpdf(x_test, mean=means[k], cov=covariances[k])

    predicted_classes = np.argmax(log_likelihoods, axis=1)
    return predicted_classes
np.random.seed(0)
noise1 = np.random.normal(loc=0.0, scale=0.27, size=x_train.shape)
x_train_noise = x_train + noise1

noise2 = np.random.normal(loc=0.0, scale=0.06, size=x_test.shape)
x_test_noise=x_test + noise2

# For original MNIST, no noise in test data
predicted_classes = calculate_predictions(x_train_noise, x_test, y_train)
# For original MNIST, added noise in test data
predicted_classes_test_noise = calculate_predictions(x_train_noise, x_test_noise, y_train)

# Calculate accuracy
accuracy = class_acc(predicted_classes, y_test)
accuracy_test_noise= class_acc(predicted_classes_test_noise, y_test)

print(f'Classification accuracy for original MNIST data with no noise in test data (Full Bayes): {accuracy:.2f}%')  #np.random.seed(0) for noise
print(f'Classification accuracy for original MNIST data with addesd noise in test data (Full Bayes): {accuracy_test_noise:.2f}%')  #np.random.seed(0) for noise

Classification accuracy for original MNIST data with no noise in test data (Full Bayes): 95.87%
Classification accuracy for original MNIST data with addesd noise in test data (Full Bayes): 95.94%


**So, after adding noise in the test data the accuracy is 0.07% better in this case.**

## MNIST Fashion (Full Bayes)

In [11]:
import numpy as np
import tensorflow as tf
from scipy.stats import multivariate_normal

mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the training and test sets
x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)
x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)

# Normalize the pixel values
x_train /= 255.0
x_test /= 255.0

# Function to compute classification accuracy
def class_acc(pred, gt):
    pred = np.array(pred)
    gt = np.array(gt)
    correct_predictions = np.sum(pred == gt)
    accuracy = (correct_predictions / len(gt)) * 100
    return accuracy

# Function to calculate log likelihoods and predictions
def calculate_predictions(x_train_noise, x_test, y_train):
    number_classes = 10
    number_features = x_train_noise.shape[1]

    means = np.zeros((number_classes, number_features))
    covariances = np.zeros((number_classes, number_features, number_features))

    # Compute mean and covariance for each class
    for k in range(number_classes):
        class_k_samples = x_train_noise[y_train == k]
        means[k, :] = np.mean(class_k_samples, axis=0)
        covariances[k, :, :] = np.cov(class_k_samples.T)

    log_likelihoods = np.zeros((x_test.shape[0], number_classes))

    for k in range(number_classes):
        log_likelihoods[:, k] = multivariate_normal.logpdf(x_test, mean=means[k], cov=covariances[k])

    predicted_classes = np.argmax(log_likelihoods, axis=1)
    return predicted_classes
np.random.seed(0)
noise3 = np.random.normal(loc=0.0, scale=0.15, size=x_train.shape)
x_train_noise = x_train + noise3

noise4 = np.random.normal(loc=0.0, scale=0.14, size=x_test.shape)
x_test_noise=x_test + noise4

# For fashion MNIST, no noise in test data
predicted_classes = calculate_predictions(x_train_noise, x_test, y_train)
# For fashion MNIST, added noise in test data
predicted_classes_test_noise = calculate_predictions(x_train_noise, x_test_noise, y_train)

# Calculate accuracy
accuracy = class_acc(predicted_classes, y_test)
accuracy_test_noise= class_acc(predicted_classes_test_noise, y_test)

print(f'Classification accuracy for fashion MNIST data with no noise in test data (Full Bayes): {accuracy:.2f}%')  #np.random.seed(0) for noise
print(f'Classification accuracy for fashion MNIST data with addesd noise in test data (Full Bayes): {accuracy_test_noise:.2f}%')  #np.random.seed(0) for noise

Classification accuracy for fashion MNIST data with no noise in test data (Full Bayes): 77.47%
Classification accuracy for fashion MNIST data with addesd noise in test data (Full Bayes): 82.64%


**So, after adding noise in the test data the accuracy is 5.17% better in this case.**