## Naive Bayes using Tensorflow

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
# import matplotlib.pyplot as plt

In [2]:
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv("magic04.data", names=cols)
df["class"] = (df["class"] == "g").astype(int)

In [3]:
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,1
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,1
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,1
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,1
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,1


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [5]:
# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

In [6]:
# Normalize the feature values
df.iloc[:, :-1] = (df.iloc[:, :-1] - df.iloc[:, :-1].mean()) / df.iloc[:, :-1].std()

In [7]:
# Split the data into features and target
features = df.drop('class', axis=1).values
target = df["class"].values

Split the data into training and validation sets

In [8]:
# Split the data into training and validation sets
train_size = int(0.8 * len(features))
X_train, X_val = features[:train_size], features[train_size:]
y_train, y_val = target[:train_size], target[train_size:]

In [9]:
print(y_train)

[0 1 1 ... 1 0 0]


Model

In [10]:
# Calculate class priors
class_counts = np.bincount(y_train.astype(int))
class_priors = class_counts / len(y_train)

In [11]:
# Calculate feature-wise conditional probabilities
num_features = X_train.shape[1]
feature_probs = np.zeros((num_features, len(class_priors)))

In [12]:
for feature_idx in range(num_features):
    for class_idx in range(len(class_priors)):
        feature_probs[feature_idx][class_idx] = np.mean(X_train[y_train == class_idx][:, feature_idx])

In [13]:
# Add a small epsilon to avoid division by zero
epsilon = 1e-10

In [14]:
# Predict the labels for the validation set
predictions = []

for sample in X_val:
    class_scores = []

    for class_idx in range(len(class_priors)):
        class_score = np.log(class_priors[class_idx])

        for feature_idx in range(num_features):
            if sample[feature_idx] == 1:
                class_score += np.log(feature_probs[feature_idx][class_idx] + epsilon)
            else:
                class_score += np.log(1 - feature_probs[feature_idx][class_idx] + epsilon)

        class_scores.append(class_score)

    predicted_class = np.argmax(class_scores)
    predictions.append(predicted_class)

In [15]:
# Calculate accuracy on the validation set
accuracy = np.mean(predictions == y_val.astype(int))
print("Accuracy:", accuracy)

Accuracy: 0.650105152471083
