# Generate Training & Testing daatsets using given mean and covariance metrices

In [1]:
import numpy as np

# Parameters
N = 1000
prior_probabilities = [1/3, 1/3, 1/3]

# Mean vectors
m1 = np.array([0, 0, 0])
m2 = np.array([1, 2, 2])
m3 = np.array([3, 3, 4])

# Covariance matrices
S1 = np.array([[0.8, 0.2, 0.1],
               [0.2, 0.8, 0.2],
               [0.1, 0.2, 0.8]])

S2 = np.array([[0.6, 0.01, 0.01],
               [0.01, 0.8, 0.01],
               [0.01, 0.01, 0.6]])

S3 = np.array([[0.6, 0.1, 0.1],
               [0.1, 0.6, 0.1],
               [0.1, 0.1, 0.6]])

# Generate datasets
X = []
X1 = []
for _ in range(N):
    # Randomly select a class based on prior probabilities
    class_idx = np.random.choice([0, 1, 2], p=prior_probabilities)
    
    # Generate data point based on selected class
    if class_idx == 0:
        x = np.random.multivariate_normal(m1, S1)
    elif class_idx == 1:
        x = np.random.multivariate_normal(m2, S2)
    else:
        x = np.random.multivariate_normal(m3, S3)
    
    # Add data point to the dataset
    X.append(x)

true_labels = []


# Generate test set X1 in the same way
# Assign true class labels to the test set based on how it was generated
for _ in range(N):
    class_idx = np.random.choice([0, 1, 2], p=prior_probabilities)
    
    if class_idx == 0:
        x = np.random.multivariate_normal(m1, S1)
        true_labels.append("w1")
    elif class_idx == 1:
        x = np.random.multivariate_normal(m2, S2)
        true_labels.append("w2")
    else:
        x = np.random.multivariate_normal(m3, S3)
        true_labels.append("w3")
    
    X1.append(x)

# Convert lists to numpy arrays
X = np.array(X)
X1 = np.array(X1)

# Save the datasets if needed
np.save('X.npy', X)
np.save('X1.npy', X1)

# Classify Using Euclidean distance classifier 

In [2]:
# Load the datasets
X1 = np.load('X1.npy') # Test set
# Mean vectors for the classes
m1 = np.array([0, 0, 0])
m2 = np.array([1, 2, 2])
m3 = np.array([3, 3, 4])
# Initialize an array to store the predicted class labels
predicted_labels = []
# Classify each point in X1
for x in X1:
    # Calculate Euclidean distances to each class mean
    dist_to_m1 = np.linalg.norm(x - m1)
    dist_to_m2 = np.linalg.norm(x - m2)
    dist_to_m3 = np.linalg.norm(x - m3)
    
    # Find the class with the minimum distance
    min_distance = min(dist_to_m1, dist_to_m2, dist_to_m3)
    
    # Assign the predicted class label based on the minimum distance
    if min_distance == dist_to_m1:
        predicted_labels.append("w1")
    elif min_distance == dist_to_m2:
        predicted_labels.append("w2")
    else:
        predicted_labels.append("w3")
# Print the predicted labels for the test set
# for i, label in enumerate(predicted_labels):
#     print(f"Point {i+1}: Predicted class = {label}")

# Classify Using Mahalanobis distance classifier 

In [3]:
# Load the datasets
X1 = np.load('X1.npy') # Test set

# Mean vectors for the classes
m1 = np.array([0, 0, 0])
m2 = np.array([1, 2, 2])
m3 = np.array([3, 3, 4])

# Covariance matrices for the classes
S1 = np.array([[0.8, 0.2, 0.1],
               [0.2, 0.8, 0.2],
               [0.1, 0.2, 0.8]])

S2 = np.array([[0.6, 0.01, 0.01],
               [0.01, 0.8, 0.01],
               [0.01, 0.01, 0.6]])

S3 = np.array([[0.6, 0.1, 0.1],
               [0.1, 0.6, 0.1],
               [0.1, 0.1, 0.6]])

S1_inv = np.linalg.inv(S1)
S2_inv = np.linalg.inv(S2)
S3_inv = np.linalg.inv(S3)
# Initialize an array to store the predicted class labels
predicted_labels_mnb = []

# Classify each point in X1 using Mahalanobis distance
for x in X1:
    # Calculate Mahalanobis distances to each class mean
#     mnb_dist_m1 = np.sqrt((x - m1).T @ S1_inv @ (x - m1))
#     mnb_dist_m2 = np.sqrt((x - m2).T @ S2_inv @ (x - m2))
#     mnb_dist_m3 = np.sqrt((x - m3).T @ S3_inv @ (x - m3))

    mnb_dist_m1 = np.sqrt(np.dot(np.dot((x - m1).T , S1_inv), (x - m1)))
    mnb_dist_m2 = np.sqrt(np.dot(np.dot((x - m2).T , S2_inv), (x - m2)))
    mnb_dist_m3 = np.sqrt(np.dot(np.dot((x - m3).T , S3_inv), (x - m3)))
    
    # Find the class with the minimum Mahalanobis distance
    min_mahalanobis_dist = min(mnb_dist_m1, mnb_dist_m2, mnb_dist_m3)
    
    # Assign the predicted class label based on the minimum Mahalanobis distance
    if min_mahalanobis_dist == mnb_dist_m1:
        predicted_labels_mnb.append("w1")
    elif min_mahalanobis_dist == mnb_dist_m2:
        predicted_labels_mnb.append("w2")
    else:
        predicted_labels_mnb.append("w3")

# Print the predicted labels for the test set using Mahalanobis distance
# for i, label in enumerate(predicted_labels_mnb):
#     print(f"Point {i+1}): Predicted class = {label}")

# Classify using Bayesian Classifier

In [4]:
from scipy.stats import multivariate_normal
# Load the datasets
X1 = np.load('X1.npy') # Test set

# Mean vectors for the classes
m1 = np.array([0, 0, 0])
m2 = np.array([1, 2, 2])
m3 = np.array([3, 3, 4])

# Covariance matrices for the classes
S1 = np.array([[0.8, 0.2, 0.1],
               [0.2, 0.8, 0.2],
               [0.1, 0.2, 0.8]])

S2 = np.array([[0.6, 0.01, 0.01],
               [0.01, 0.8, 0.01],
               [0.01, 0.01, 0.6]])

S3 = np.array([[0.6, 0.1, 0.1],
               [0.1, 0.6, 0.1],
               [0.1, 0.1, 0.6]])

# Prior probabilities for the classes
prior_probabilities = [1/3, 1/3, 1/3]

# Initialize an array to store the predicted class labels
predicted_labels_bayesian = []

# Classify each point in X1 using Bayesian classifier
for x in X1:
    # Calculate class-conditional densities using multivariate normal distribution
    p_x_given_m1 = multivariate_normal.pdf(x, mean=m1, cov=S1)
    p_x_given_m2 = multivariate_normal.pdf(x, mean=m2, cov=S2)
    p_x_given_m3 = multivariate_normal.pdf(x, mean=m3, cov=S3)
    
    # Calculate posterior probabilities using Bayes' theorem
    posterior_m1 = p_x_given_m1 * prior_probabilities[0]
    posterior_m2 = p_x_given_m2 * prior_probabilities[1]
    posterior_m3 = p_x_given_m3 * prior_probabilities[2]
    
    # Find the class with the maximum posterior probability
    max_posterior = max(posterior_m1, posterior_m2, posterior_m3)
    
    # Assign the predicted class label based on the maximum posterior probability
    if max_posterior == posterior_m1:
        predicted_labels_bayesian.append("w1")
    elif max_posterior == posterior_m2:
        predicted_labels_bayesian.append("w2")
    else:
        predicted_labels_bayesian.append("w3")

# Print the predicted labels for the test set using the Bayesian classifier
# for i, label in enumerate(predicted_labels_bayesian):
#     print(f"Point {i+1}: Predicted class = {label}")


# For each class, compute the error probability and compare the results

In [8]:
# Load the datasets
X1 = np.load('X1.npy') # Test set

# Function to calculate error probability
def calculate_error_prob(true_labels, predicted_labels):
    incorrect_count = 0
    for true_label, predicted_label in zip(true_labels, predicted_labels):
        if true_label != predicted_label:
            incorrect_count += 1
    error_prob = incorrect_count / len(true_labels)
    return error_prob

# Calculate error probabilities for each classifier
error_prob_euclidean = calculate_error_prob(true_labels, predicted_labels)
error_prob_mnb = calculate_error_prob(true_labels, predicted_labels_mnb)
error_prob_bayesian = calculate_error_prob(true_labels, predicted_labels_bayesian)

# Print the error probabilities for each classifier
print(f"Error Probability (Euclidean Distance Classifier): {error_prob_euclidean*100:.2f}")
print(f"Error Probability (Mahalanobis Distance Classifier): {error_prob_mnb*100:.2f}")
print(f"Error Probability (Bayesian Classifier): {error_prob_bayesian*100:.2f}%")


Error Probability (Euclidean Distance Classifier): 7.00
Error Probability (Mahalanobis Distance Classifier): 6.30
Error Probability (Bayesian Classifier): 6.60%
