In [35]:
import pandas as pd
import numpy as np

# Load training and test data
train_data = pd.read_csv('data7.csv')
test_case_data = pd.read_csv('test7.csv')

# Separate features and labels
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]

# Calculate prior probabilities
prior_0 = len(y_train[y_train == 0]) / len(y_train)
prior_1 = len(y_train[y_train == 1]) / len(y_train)

# Calculate likelihoods
likelihoods = {}
for col in X_train.columns:
    likelihoods[col] = {}
    for val in [0, 1]:
        subset = X_train[y_train == val]
        likelihoods[col][val] = {
            0: (len(subset[subset[col] == 0]) / len(subset)) or 0.0001,
            1: (len(subset[subset[col] == 1]) / len(subset)) or 0.0001
        }

# Function to calculate posterior probabilities
def predict(row):
    post_0 = prior_0
    post_1 = prior_1
    print(f"\nCalculating for: {row.tolist()}")
    for col in X_train.columns:
        post_0 *= likelihoods[col][0][row[col]]
        post_1 *= likelihoods[col][1][row[col]]
        print(f"P({col}= {row[col]} | y=0): {likelihoods[col][0][row[col]]}")
        print(f"P({col}= {row[col]} | y=1): {likelihoods[col][1][row[col]]}")
    print(f"Posterior for y=0: {post_0}")
    print(f"Posterior for y=1: {post_1}")
    return 1 if post_1 > post_0 else 0

# Predict test case data
test_case_data['Predicted'] = test_case_data.apply(predict, axis=1)

# Print the probability table
print("\n--- Probability Table ---")
for key, value in likelihoods.items():
    print(f"{key}: {value}")

# Print predictions
print("\n--- Predicted class labels for test cases ---")
print(test_case_data['Predicted'])

# Calculate performance metrics
def calculate_metrics(y_true, y_pred):
    tp = sum((y_true == 1) & (y_pred == 1))
    tn = sum((y_true == 0) & (y_pred == 0))
    fp = sum((y_true == 0) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f_score = 2 * (precision * recall) / (precision + recall)

    print(f"\n--- Evaluation Metrics ---")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F-Score: {f_score}")
calculate_metrics(y_train,test_case_data['Predicted'])
# Write the equation used in classification
print("\n--- Equation Used in Classification ---")
print("P(y|x1,x2,...,x8) = P(y) * P(x1|y) * P(x2|y) * ... * P(x8|y)")

# Calculate Joint Probability
def calculate_joint_probability(row):
    joint_prob = prior_0
    for col in X_train.columns:
        joint_prob *= likelihoods[col][0][row[col]]
    print(f"\nJoint Probability P(x1,x2,...,x8): {joint_prob}")

print("\n--- Joint Probability for Test Cases ---")
test_case_data.apply(calculate_joint_probability, axis=1)


Calculating for: [0, 1, 1, 1, 1, 1, 1, 1, '?']
P(x1= 0 | y=0): 0.0001
P(x1= 0 | y=1): 0.35135135135135137
P(x2= 1 | y=0): 0.42857142857142855
P(x2= 1 | y=1): 1.0
P(x3= 1 | y=0): 0.42857142857142855
P(x3= 1 | y=1): 1.0
P(x4= 1 | y=0): 1.0
P(x4= 1 | y=1): 0.5945945945945946
P(x5= 1 | y=0): 0.7142857142857143
P(x5= 1 | y=1): 1.0
P(x6= 1 | y=0): 0.6666666666666666
P(x6= 1 | y=1): 0.5675675675675675
P(x7= 1 | y=0): 0.42857142857142855
P(x7= 1 | y=1): 0.0001
P(x8= 1 | y=0): 1.0
P(x8= 1 | y=1): 0.5405405405405406
Posterior for y=0: 1.3571931235548405e-06
Posterior for y=1: 4.088670923259473e-06

Calculating for: [1, 0, 0, 0, 0, 0, 0, 0, '?']
P(x1= 1 | y=0): 1.0
P(x1= 1 | y=1): 0.6486486486486487
P(x2= 0 | y=0): 0.5714285714285714
P(x2= 0 | y=1): 0.0001
P(x3= 0 | y=0): 0.5714285714285714
P(x3= 0 | y=1): 0.0001
P(x4= 0 | y=0): 0.0001
P(x4= 0 | y=1): 0.40540540540540543
P(x5= 0 | y=0): 0.2857142857142857
P(x5= 0 | y=1): 0.0001
P(x6= 0 | y=0): 0.3333333333333333
P(x6= 0 | y=1): 0.432432432432432

Unnamed: 0,0
0,
1,
2,
3,
