In [None]:
import numpy as np

# Bayesian Classifier

In [None]:
def pdf(mean, sd, x):
    # Calculate the likelihood using the normal distribution formula
    expo = np.multiply(np.square(np.divide(np.subtract(x, mean), sd)),-0.5)
    k = np.sqrt(np.multiply(np.multiply(np.square(sd),np.pi),2.0))
    likelihood = np.divide(np.exp(expo),k)
    return likelihood

def binary_bc_training(dataset, dim):
    # Initialize variables to store sums and counts for positive and negative samples
    sum_p = np.zeros(dim)
    sum_n = np.zeros(dim)
    sqr_sum_p = np.zeros(dim)
    sqr_sum_n = np.zeros(dim)
    count_p = 0
    count_n = 0

    # Iterate over each sample in the dataset
    for each in range(len(dataset)):
        # Extract the feature values (x_train) and class label (last element)
        x_train = dataset[each][0:dim]
        y_train = dataset[each][dim]

        # If the class label is positive, update the corresponding sums and counts
        if y_train > 0:
            sum_p += x_train
            sqr_sum_p += x_train**2
            count_p += 1
        # If the class label is negative, update the corresponding sums and counts
        else:
            sum_n += x_train
            sqr_sum_n += x_train**2
            count_n += 1

    # Calculate the mean, variance, and standard deviation for positive samples
    mean_p = sum_p / count_p
    var_p = (sqr_sum_p / count_p) - (mean_p**2)
    sd_p = np.sqrt(var_p)

    # Calculate the mean, variance, and standard deviation for negative samples
    mean_n = sum_n / count_n
    var_n = (sqr_sum_n / count_n) - (mean_n**2)
    sd_n = np.sqrt(var_n)

    # Return the mean and standard deviation for both classes
    return mean_p, sd_p, mean_n, sd_n


def binary_bc_inferencing(mean_p, sd_p, mean_n, sd_n, x, dim, p_pos, p_neg):
    # Calculate the likelihood of the new data point being in the positive and negative classes
    pos_given_x = np.prod(pdf(mean_p, sd_p, x))
    neg_given_x = np.prod(pdf(mean_n, sd_n, x))

    # Compare the product of likelihoods for each class
    if pos_given_x * p_pos > neg_given_x * p_neg:
        return 1  # Positive class
    else:
        return -1  # Negative class


In [None]:
# Define dimension of the data
dim = 2

# Define the number of samples to generate
target_num_samples = 40  # This is the number of positive samples in the testing set

# Try 10 times
for r in range(30):

    # Initialize the dataset
    Sn = []  # List to store negative samples
    Sp = []  # List to store positive samples
    num_n = 0  # Counter for negative samples
    num_p = 0  # Counter for positive samples
    true_positive = 0  # Counter for true positive samples
    true_negative = 0  # Counter for true negative samples
    false_positive = 0  # Counter for false positive samples
    false_negative = 0  # Counter for false negative samples

    # Generate samples until the target number is reached for both positive and negative samples
    while num_p < target_num_samples * 2 or num_n < target_num_samples * 2:
        # Generate a random vector within the range [-5, 5] with dimension (dim+1)
        random_sample = np.random.uniform(-5, 5, (dim + 1))
        # Set the last dimension to 0 temporarily
        random_sample[dim] = 0
        # Compute the sum of the vector components
        sum_vector = np.sum(random_sample)
        # If the sum is positive, append the sample to Sp
        if sum_vector > 0:
            if num_p < target_num_samples * 2:
                random_sample[dim] = 1
                Sp.append(random_sample)
                num_p += 1
        elif num_n < target_num_samples * 2:
            random_sample[dim] = -1
            Sn.append(random_sample)
            num_n += 1

    # Split the dataset into training (Sp, Sn) and testing (Tp, Tn)
    # The last target_num_samples samples from Sn and Sp are used for testing.
    Tp = Sp[target_num_samples:]
    Tn = Sn[target_num_samples:]

    # The first target_num_samples samples from Sn and Sp are used for training.
    Sp = Sp[:target_num_samples]
    Sn = Sn[:target_num_samples]

    # Combine positive and negative samples to create the dataset
    dataset = Sn + Sp

    # Calculate the Gaussian distribution based on training data
    mean_p, sd_p, mean_n, sd_n = binary_bc_training(dataset, dim)

    # Combine positive and negative samples to create the testing dataset
    testing_dataset = Tp + Tn

    # Initialize counters for evaluation metrics
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0

    # Loop through the testing_dataset
    for each in range(len(testing_dataset)):
        # Make a prediction using the binary_bc_inferencing function with the testing sample
        prediction = binary_bc_inferencing(mean_p, sd_p, mean_n, sd_n, testing_dataset[each][0:dim], dim, 0.5, 0.5)  # Prior probability = 0.5 for both classes

        # Increment the appropriate counters based on whether the inference is true positive, true negative, false positive, or false negative
        if testing_dataset[each][dim] == 1:
            if prediction == 1:
                true_positive += 1
            else:
                false_negative += 1
        else:
            if prediction == -1:
                true_negative += 1
            else:
                false_positive += 1

    # Calculate precision
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0

    # Calculate recall
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

    # Calculate F1 score
    f1score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    # Print the result
    print("Case",r+1,"Precision:", precision, "Recall:", recall, "F1 Score:", f1score)


Case 1 Precision: 0.975609756097561 Recall: 1.0 F1 Score: 0.9876543209876543
Case 2 Precision: 1.0 Recall: 0.95 F1 Score: 0.9743589743589743
Case 3 Precision: 0.9743589743589743 Recall: 0.95 F1 Score: 0.9620253164556962
Case 4 Precision: 0.85 Recall: 0.85 F1 Score: 0.85
Case 5 Precision: 1.0 Recall: 0.925 F1 Score: 0.961038961038961
Case 6 Precision: 0.975 Recall: 0.975 F1 Score: 0.975
Case 7 Precision: 0.8974358974358975 Recall: 0.875 F1 Score: 0.8860759493670887
Case 8 Precision: 0.975609756097561 Recall: 1.0 F1 Score: 0.9876543209876543
Case 9 Precision: 1.0 Recall: 0.95 F1 Score: 0.9743589743589743
Case 10 Precision: 0.8372093023255814 Recall: 0.9 F1 Score: 0.8674698795180723
Case 11 Precision: 0.9210526315789473 Recall: 0.875 F1 Score: 0.8974358974358975
Case 12 Precision: 0.9090909090909091 Recall: 1.0 F1 Score: 0.9523809523809523
Case 13 Precision: 1.0 Recall: 0.775 F1 Score: 0.8732394366197184
Case 14 Precision: 1.0 Recall: 0.95 F1 Score: 0.9743589743589743
Case 15 Precision: 0

In [None]:
# Define dimension of the data
dim = 10

# Define the number of samples to generate
target_num_samples = 200  # This is the number of positive samples in the testing set

# Try 10 times
for r in range(30):

    # Initialize the dataset
    Sn = []  # List to store negative samples
    Sp = []  # List to store positive samples
    num_n = 0  # Counter for negative samples
    num_p = 0  # Counter for positive samples
    true_positive = 0  # Counter for true positive samples
    true_negative = 0  # Counter for true negative samples
    false_positive = 0  # Counter for false positive samples
    false_negative = 0  # Counter for false negative samples

    # Generate samples until the target number is reached for both positive and negative samples
    while num_p < target_num_samples * 2 or num_n < target_num_samples * 2:
        # Generate a random vector within the range [-5, 5] with dimension (dim+1)
        random_sample = np.random.uniform(-5, 5, (dim + 1))
        # Set the last dimension to 0 temporarily
        random_sample[dim] = 0
        # Compute the sum of the vector components
        sum_vector = np.sum(random_sample)
        # If the sum is positive, append the sample to Sp
        if sum_vector > 0:
            if num_p < target_num_samples * 2:
                random_sample[dim] = 1
                Sp.append(random_sample)
                num_p += 1
        elif num_n < target_num_samples * 2:
            random_sample[dim] = -1
            Sn.append(random_sample)
            num_n += 1

    # Split the dataset into training (Sp, Sn) and testing (Tp, Tn)
    # The last target_num_samples samples from Sn and Sp are used for testing.
    Tp = Sp[target_num_samples:]
    Tn = Sn[target_num_samples:]

    # The first target_num_samples samples from Sn and Sp are used for training.
    Sp = Sp[:target_num_samples]
    Sn = Sn[:target_num_samples]

    # Combine positive and negative samples to create the dataset
    dataset = Sn + Sp

    # Calculate the Gaussian distribution based on training data
    mean_p, sd_p, mean_n, sd_n = binary_bc_training(dataset, dim)

    # Combine positive and negative samples to create the testing dataset
    testing_dataset = Tp + Tn

    # Initialize counters for evaluation metrics
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0

    # Loop through the testing_dataset
    for each in range(len(testing_dataset)):
        # Make a prediction using the binary_bc_inferencing function with the testing sample
        prediction = binary_bc_inferencing(mean_p, sd_p, mean_n, sd_n, testing_dataset[each][0:dim], dim, 0.5, 0.5)  # Prior probability = 0.5 for both classes

        # Increment the appropriate counters based on whether the inference is true positive, true negative, false positive, or false negative
        if testing_dataset[each][dim] == 1:
            if prediction == 1:
                true_positive += 1
            else:
                false_negative += 1
        else:
            if prediction == -1:
                true_negative += 1
            else:
                false_positive += 1

    # Calculate precision
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0

    # Calculate recall
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

    # Calculate F1 score
    f1score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    # Print the result
    print("Case",r+1,"Precision:", precision, "Recall:", recall, "F1 Score:", f1score)


Case 1 Precision: 0.9731182795698925 Recall: 0.905 F1 Score: 0.9378238341968912
Case 2 Precision: 0.9038461538461539 Recall: 0.94 F1 Score: 0.9215686274509804
Case 3 Precision: 0.8947368421052632 Recall: 0.935 F1 Score: 0.9144254278728606
Case 4 Precision: 0.953125 Recall: 0.915 F1 Score: 0.9336734693877552
Case 5 Precision: 0.9459459459459459 Recall: 0.875 F1 Score: 0.9090909090909091
Case 6 Precision: 0.9211822660098522 Recall: 0.935 F1 Score: 0.9280397022332506
Case 7 Precision: 0.905 Recall: 0.905 F1 Score: 0.905
Case 8 Precision: 0.9642857142857143 Recall: 0.945 F1 Score: 0.9545454545454546
Case 9 Precision: 0.9073170731707317 Recall: 0.93 F1 Score: 0.9185185185185186
Case 10 Precision: 0.9090909090909091 Recall: 0.95 F1 Score: 0.9290953545232273
Case 11 Precision: 0.915 Recall: 0.915 F1 Score: 0.915
Case 12 Precision: 0.9411764705882353 Recall: 0.88 F1 Score: 0.9095607235142118
Case 13 Precision: 0.9252336448598131 Recall: 0.99 F1 Score: 0.9565217391304348
Case 14 Precision: 0.93