In [1]:
import csv
import numpy as np

In [2]:
class NaiveBayes:
    def __init__(self):
        self.class_priors = {}
        self.feature_likelihoods = {}
        self.classes = []

    def fit(self, X, y):
        # Training   
        n_samples, n_features = X.shape
        self.classes = np.unique(y)

        # Prior prob.
        self.class_priors = {cls: np.sum(y == cls) / n_samples for cls in self.classes}

        # Likelihood
        self.feature_likelihoods = {cls: {} for cls in self.classes}
        for cls in self.classes:
            X_class = X[y == cls]
            for i in range(n_features):
                feature_vals = np.unique(X[:, i])  # unique values for each feature
                feature_counts = {val: np.sum(X_class[:, i] == val) for val in feature_vals}
                total_count = X_class.shape[0]
                self.feature_likelihoods[cls][i] = {val: (feature_counts.get(val, 0) + 1) / (total_count + len(feature_vals))
                                                    for val in feature_vals}

    def predict(self, X):
        return [self._predict_single(x) for x in X]

    def _predict_single(self, x):
        posteriors = {}
        
        #posterior prob.
        for cls in self.classes:
            prior_log = np.log(self.class_priors[cls])  # P(C)
            likelihood_log = 0
            
            for i, feature_value in enumerate(x):
                if feature_value in self.feature_likelihoods[cls][i]:
                    likelihood_log += np.log(self.feature_likelihoods[cls][i][feature_value])
                else:
                    likelihood_log += np.log(1e-6)  #for unseen feature values
            
            #posterior=prior+likelihood
            posteriors[cls] = prior_log + likelihood_log
        
        return max(posteriors, key=posteriors.get)


In [3]:
def encode_categorical(X):
    feature_maps = [{} for _ in range(X.shape[1])]
    X_encoded = np.zeros_like(X, dtype=int)

    for i in range(X.shape[1]): 
        unique_values = np.unique(X[:, i])
        feature_map = {val: idx for idx, val in enumerate(unique_values)}
        feature_maps[i] = feature_map
        X_encoded[:, i] = [feature_map[val] for val in X[:, i]]

    return X_encoded, feature_maps

# Load dataset
def load_dataset(file_path):
    X = []
    y = []

    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader) 

        for row in reader:
            X.append(row[:-1])  
            y.append(row[-1])   
    X = np.array(X)
    y = np.array(y)
    X_encoded, feature_maps = encode_categorical(X)
    label_map = {val: idx for idx, val in enumerate(np.unique(y))}
    y_encoded = np.array([label_map[val] for val in y])

    return X_encoded, y_encoded, feature_maps, label_map


In [4]:
def decode_prediction(predictions, label_map):
    #Decode     
    reverse_label_map = {v: k for k, v in label_map.items()}
    return [reverse_label_map[pred] for pred in predictions]


In [5]:
def get_user_input(feature_maps):
    test_sample = []
    for i, feature_map in enumerate(feature_maps):
        valid_inputs = list(feature_map.keys())
        print(f"Feature {i + 1} options: {valid_inputs}")
        while True:
            user_input = input(f"Enter a feature {i + 1}: ")
            if user_input in feature_map:
                test_sample.append(feature_map[user_input])
                break
            else:
                print("Wrong input")
    return np.array(test_sample)


In [None]:
file_path = input("Path to the dataset or Dataset(csv file)")
X, y, feature_maps, label_map = load_dataset(file_path)

model = NaiveBayes()
model.fit(X, y)

# Test inputs provided by user
while True:
    print("\nEnter the feature:")
    test_sample = get_user_input(feature_maps)
    prediction = model.predict(np.array([test_sample]))
    decoded_prediction = decode_prediction(prediction, label_map)
    print(f"Predicted class: {decoded_prediction[0]}\n")
          
    repeat = input("Would you like to test another sample? (yes/no): ")
    if repeat.lower() != "yes":
        break
