In [1]:
import pandas as pd

data = {
    'y1': [0.24, 0.16, 0.32, 0.54, 0.66, 0.76, 0.41, 0.38, 0.42],
    'y2': [0.36, 0.48, 0.72, 0.11, 0.39, 0.28, 0.53, 0.52, 0.59],
    'y3': [1, 1, 0, 0, 0, 1, 0, 0, 0],
    'y4': [1, 0, 1, 0, 0, 0, 1, 1, 1],
    'y5': [0, 1, 2, 1, 0, 2, 1, 0, 1],
    'y6': ['A', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'B']
}

df = pd.DataFrame(data)

# Split the data into training and testing observations
train_data = df.iloc[:7]
test_data = df.iloc[7:]

train_data

Unnamed: 0,y1,y2,y3,y4,y5,y6
0,0.24,0.36,1,1,0,A
1,0.16,0.48,1,0,1,A
2,0.32,0.72,0,1,2,A
3,0.54,0.11,0,0,1,B
4,0.66,0.39,0,0,0,B
5,0.76,0.28,1,0,2,B
6,0.41,0.53,0,1,1,B


In [2]:
import numpy as np

class_A_data = train_data[train_data['y6'] == 'A']
class_B_data = train_data[train_data['y6'] == 'B']

# Calculate the priors for Class A and Class B
total_samples = len(train_data)
total_class_A_samples = len(class_A_data)
total_class_B_samples = len(class_B_data)

# Prior Probability of Class A
prior_A = total_class_A_samples / total_samples

# Prior Probability of Class B
prior_B = total_class_B_samples / total_samples

# Print the priors
print("Prior Probability of Class A (P(y6 = A)): {:.2f}".format(prior_A))
print("Prior Probability of Class B (P(y6 = B)): {:.2f}".format(prior_B))
print("\n")

# Define the sets of independent variables for each set
set1_features = ['y1', 'y2']
set2_features = ['y3', 'y4']
set3_features = ['y5']

# Calculate mean and covariance matrices for Class A and Class B for each set

# Set 1 (𝑦1 and 𝑦2)
Mean_A_set1 = class_A_data[set1_features].mean().values
Covariance_A_set1 = class_A_data[set1_features].cov().values

Mean_B_set1 = class_B_data[set1_features].mean().values
Covariance_B_set1 = class_B_data[set1_features].cov().values

# Set 2 (𝑦3 and 𝑑4)
Mean_A_set2 = class_A_data[set2_features].mean().values
Covariance_A_set2 = class_A_data[set2_features].cov().values

Mean_B_set2 = class_B_data[set2_features].mean().values
Covariance_B_set2 = class_B_data[set2_features].cov().values

# Set 3 (𝑦5)
Mean_A_set3 = class_A_data[set3_features].mean().values
StdDev_A_set3 = class_A_data[set3_features].std().values

Mean_B_set3 = class_B_data[set3_features].mean().values
StdDev_B_set3 = class_B_data[set3_features].std().values


# Print the estimated mean and covariance matrices
print("Set 1 (𝑦1 and 𝑦2):")
print("Mean_A_set1:")
print(Mean_A_set1)
print("Covariance_A_set1:")
print(Covariance_A_set1)
print("Mean_B_set1:")
print(Mean_B_set1)
print("Covariance_B_set1:")
print(Covariance_B_set1)

print("\nSet 2 (𝑦3 and 𝑑4):")
print("Mean_A_set2:")
print(Mean_A_set2)
print("Covariance_A_set2:")
print(Covariance_A_set2)
print("Mean_B_set2:")
print(Mean_B_set2)
print("Covariance_B_set2:")
print(Covariance_B_set2)

print("\nSet 3 (𝑦5):")
print("Mean_A_set3:")
print(Mean_A_set3)
print("StdDev_A_set3:")
print(StdDev_A_set3)
print("Mean_B_set3:")
print(Mean_B_set3)
print("StdDev_B_set3:")
print(StdDev_B_set3)


Prior Probability of Class A (P(y6 = A)): 0.43
Prior Probability of Class B (P(y6 = B)): 0.57


Set 1 (𝑦1 and 𝑦2):
Mean_A_set1:
[0.24 0.52]
Covariance_A_set1:
[[0.0064 0.0096]
 [0.0096 0.0336]]
Mean_B_set1:
[0.5925 0.3275]
Covariance_B_set1:
[[ 0.02289167 -0.00975833]
 [-0.00975833  0.03149167]]

Set 2 (𝑦3 and 𝑑4):
Mean_A_set2:
[0.66666667 0.66666667]
Covariance_A_set2:
[[ 0.33333333 -0.16666667]
 [-0.16666667  0.33333333]]
Mean_B_set2:
[0.25 0.25]
Covariance_B_set2:
[[ 0.25       -0.08333333]
 [-0.08333333  0.25      ]]

Set 3 (𝑦5):
Mean_A_set3:
[1.]
StdDev_A_set3:
[1.]
Mean_B_set3:
[1.]
StdDev_B_set3:
[0.81649658]


In [3]:
from scipy.stats import multivariate_normal, norm

# Extract 𝐱8 and 𝐱9 values from the test_data DataFrame
x8_row = test_data.iloc[0]
x9_row = test_data.iloc[1]

# Define the observed values for 𝐱8 and 𝐱9
x8_set1 = np.array([x8_row['y1'], x8_row['y2']])
x8_set2 = np.array([x8_row['y3'], x8_row['y4']])
x8_set3 = x8_row['y5']

x9_set1 = np.array([x9_row['y1'], x9_row['y2']])
x9_set2 = np.array([x9_row['y3'], x9_row['y4']])
x9_set3 = x9_row['y5']

# Calculate the likelihoods for 𝐱8 and 𝐱9 for each class (A and B)
# Likelihood for y1 and y2
likelihood_x8_A_set1 = multivariate_normal.pdf(x8_set1, mean=Mean_A_set1, cov=Covariance_A_set1)
likelihood_x8_B_set1 = multivariate_normal.pdf(x8_set1, mean=Mean_B_set1, cov=Covariance_B_set1)

likelihood_x9_A_set1 = multivariate_normal.pdf(x9_set1, mean=Mean_A_set1, cov=Covariance_A_set1)
likelihood_x9_B_set1 = multivariate_normal.pdf(x9_set1, mean=Mean_B_set1, cov=Covariance_B_set1)

# Likelihood for y3 and y4 (assuming independence within the set)
likelihood_x8_A_set2 = multivariate_normal.pdf(x8_set2, mean=Mean_A_set2, cov=Covariance_A_set2)
likelihood_x8_B_set2 = multivariate_normal.pdf(x8_set2, mean=Mean_B_set2, cov=Covariance_B_set2)

likelihood_x9_A_set2 = multivariate_normal.pdf(x9_set2, mean=Mean_A_set2, cov=Covariance_A_set2)
likelihood_x9_B_set2 = multivariate_normal.pdf(x9_set2, mean=Mean_B_set2, cov=Covariance_B_set2)

# Likelihood for y5
likelihood_x8_A_set3 = norm.pdf(x8_set3, loc=Mean_A_set3, scale=StdDev_A_set3)
likelihood_x8_B_set3 = norm.pdf(x8_set3, loc=Mean_B_set3, scale=StdDev_B_set3)

likelihood_x9_A_set3 = norm.pdf(x9_set3, loc=Mean_A_set3, scale=StdDev_A_set3)
likelihood_x9_B_set3 = norm.pdf(x9_set3, loc=Mean_B_set3, scale=StdDev_B_set3)

# Print out the likelihoods for 𝐱8 and 𝐱9
print("Likelihoods for 𝐱8:")
print("P(A | 𝐱8) for set 1 (y1, y2):", likelihood_x8_A_set1)
print("P(B | 𝐱8) for set 1 (y1, y2):", likelihood_x8_B_set1)
print("P(A | 𝐱8) for set 2 (y3, y4):", likelihood_x8_A_set2)
print("P(B | 𝐱8) for set 2 (y3, y4):", likelihood_x8_B_set2)
print("P(A | 𝐱8) for set 3 (y5):", likelihood_x8_A_set3)
print("P(B | 𝐱8) for set 3 (y5):", likelihood_x8_B_set3)

print("\nLikelihoods for 𝐱9:")
print("P(A | 𝐱9) for set 1 (y1, y2):", likelihood_x9_A_set1)
print("P(B | 𝐱9) for set 1 (y1, y2):", likelihood_x9_B_set1)
print("P(A | 𝐱9) for set 2 (y3, y4):", likelihood_x9_A_set2)
print("P(B | 𝐱9) for set 2 (y3, y4):", likelihood_x9_B_set2)
print("P(A | 𝐱9) for set 3 (y5):", likelihood_x9_A_set3)
print("P(B | 𝐱9) for set 3 (y5):", likelihood_x9_B_set3)



Likelihoods for 𝐱8:
P(A | 𝐱8) for set 1 (y1, y2): 0.9847047567047768
P(B | 𝐱8) for set 1 (y1, y2): 1.9623691378628592
P(A | 𝐱8) for set 2 (y3, y4): 0.2830616931268778
P(B | 𝐱8) for set 2 (y3, y4): 0.21921743508253846
P(A | 𝐱8) for set 3 (y5): [0.24197072]
P(B | 𝐱8) for set 3 (y5): [0.23079948]

Likelihoods for 𝐱9:
P(A | 𝐱9) for set 1 (y1, y2): 0.4030709859326153
P(B | 𝐱9) for set 1 (y1, y2): 1.7285705872234585
P(A | 𝐱9) for set 2 (y3, y4): 0.2830616931268778
P(B | 𝐱9) for set 2 (y3, y4): 0.21921743508253846
P(A | 𝐱9) for set 3 (y5): [0.39894228]
P(B | 𝐱9) for set 3 (y5): [0.48860251]


In [32]:
# Calculate the posterior probabilities for 𝐱8 and 𝐱9 for each class (A and B)
posterior_A_x8 = likelihood_x8_A_set1 * likelihood_x8_A_set2 * likelihood_x8_A_set3 * prior_A
posterior_B_x8 = likelihood_x8_B_set1 * likelihood_x8_B_set2 * likelihood_x8_B_set3 * prior_B

posterior_A_x9 = likelihood_x9_A_set1 * likelihood_x9_A_set2 * likelihood_x9_A_set3 * prior_A
posterior_B_x9 = likelihood_x9_B_set1 * likelihood_x9_B_set2 * likelihood_x9_B_set3 * prior_B

# Classify 𝐱8 and 𝐱9 based on the class with the highest posterior probability
predicted_class_x8 = 'A' if posterior_A_x8 > posterior_B_x8 else 'B'
predicted_class_x9 = 'A' if posterior_A_x9 > posterior_B_x9 else 'B'

# Print the posterior probabilities for 𝐱8 and 𝐱9
print("Posterior probabilities for 𝐱8:")
print("P(A | 𝐱8):", posterior_A_x8)
print("P(B | 𝐱8):", posterior_B_x8)

print("\nPosterior probabilities for 𝐱9:")
print("P(A | 𝐱9):", posterior_A_x9)
print("P(B | 𝐱9):", posterior_B_x9)

# Print the predicted class for 𝐱8 and 𝐱9
print("Predicted class for 𝐱8:", predicted_class_x8)
print("Predicted class for 𝐱9:", predicted_class_x9)


Posterior probabilities for 𝐱8:
P(A | 𝐱8): [0.02890501]
P(B | 𝐱8): [0.0567352]

Posterior probabilities for 𝐱9:
P(A | 𝐱9): [0.01950724]
P(B | 𝐱9): [0.10579858]
Predicted class for 𝐱8: B
Predicted class for 𝐱9: B


In [31]:
import numpy as np

# Create a range of threshold values
threshold_values = np.linspace(0, 1, 101)  # Adjust the number of values as needed

# Initialize variables to store the best threshold and accuracy
best_threshold = 0
best_accuracy = 0

for threshold in threshold_values:
    # Apply the threshold to classify x8 and x9 for all three sets of features
    predicted_x8_class_set1 = 'A' if (likelihood_x8_A_set1 * prior_A > threshold) else 'B'
    predicted_x8_class_set2 = 'A' if (likelihood_x8_A_set2 * prior_A > threshold) else 'B'
    predicted_x8_class_set3 = 'A' if (likelihood_x8_A_set3 * prior_A > threshold) else 'B'
    
    predicted_x9_class_set1 = 'A' if (likelihood_x9_A_set1 * prior_A > threshold) else 'B'
    predicted_x9_class_set2 = 'A' if (likelihood_x9_A_set2 * prior_A > threshold) else 'B'
    predicted_x9_class_set3 = 'A' if (likelihood_x9_A_set3 * prior_A > threshold) else 'B'
    
    # Calculate accuracy on the test set for all sets of features
    correct_predictions_set1 = 0
    if predicted_x8_class_set1 == test_data.iloc[0]['y6']:
        correct_predictions_set1 += 1
    if predicted_x9_class_set1 == test_data.iloc[1]['y6']:
        correct_predictions_set1 += 1
    
    correct_predictions_set2 = 0
    if predicted_x8_class_set2 == test_data.iloc[0]['y6']:
        correct_predictions_set2 += 1
    if predicted_x9_class_set2 == test_data.iloc[1]['y6']:
        correct_predictions_set2 += 1
    
    correct_predictions_set3 = 0
    if predicted_x8_class_set3 == test_data.iloc[0]['y6']:
        correct_predictions_set3 += 1
    if predicted_x9_class_set3 == test_data.iloc[1]['y6']:
        correct_predictions_set3 += 1
    
    accuracy = (correct_predictions_set1 + correct_predictions_set2 + correct_predictions_set3) / 6  # Since there are 2 test samples for each set of features
    
    # Update the best threshold and accuracy if the current accuracy is higher
    if accuracy >= best_accuracy:
        best_threshold = threshold
        best_accuracy = accuracy

    #0.66(6) is the best accuracy so anything that has better accuracy than 0.666 is a threshold that has the maximum accuracy
    if accuracy > 0.666:
        print(threshold)

# Print the best threshold and accuracy
print("Best Threshold:", best_threshold)
print("Best Accuracy on Test Set:", best_accuracy)

0.18
0.19
0.2
0.21
0.22
0.23
0.24
0.25
0.26
0.27
0.28
0.29
0.3
0.31
0.32
0.33
0.34
0.35000000000000003
0.36
0.37
0.38
0.39
0.4
0.41000000000000003
0.42
Best Threshold: 0.42
Best Accuracy on Test Set: 0.6666666666666666
