In [1]:
import math

class GaussianNaiveBayes:
    def __init__(self):
        self.class_probabilities = {}
        self.class_means = {}
        self.class_variances = {}
        self.classes = set()

    def calculate_probability(self, x, mean, variance):
        exponent = math.exp(-(math.pow(x - mean, 2) / (2 * variance)))
        return (1 / (math.sqrt(2 * math.pi * variance))) * exponent

    def train(self, X, y):
        total_samples = len(y)
        total_features = len(X[0])

        # Calculate class probabilities
        for label in y:
            if label not in self.class_probabilities:
                self.class_probabilities[label] = y.count(label) / total_samples
                self.classes.add(label)

        # Calculate class means and variances for each feature
        for label in self.classes:
            self.class_means[label] = [0] * total_features
            self.class_variances[label] = [0] * total_features

            label_indices = [i for i, val in enumerate(y) if val == label]
            subset_X = [X[i] for i in label_indices]

            for feature in range(total_features):
                feature_values = [sample[feature] for sample in subset_X]
                self.class_means[label][feature] = sum(feature_values) / len(feature_values)
                self.class_variances[label][feature] = sum([(x - self.class_means[label][feature]) ** 2 for x in feature_values]) / (len(feature_values) - 1)

    def predict(self, sample):
        best_class = None
        best_prob = -1

        for label in self.classes:
            class_prob = self.class_probabilities[label]
            for feature, value in enumerate(sample):
                mean = self.class_means[label][feature]
                variance = self.class_variances[label][feature]
                if variance == 0:
                    variance = 1e-5  # Add a small value to avoid division by zero
                class_prob *= self.calculate_probability(value, mean, variance)

            if class_prob > best_prob:
                best_prob = class_prob
                best_class = label

        return best_class


# Sample dataset (for demonstration purposes)
X = [
    [5.1, 3.5, 1.4, 0.2],
    [4.9, 3.0, 1.4, 0.2],
    [5.8, 2.6, 4.0, 1.2],
    [6.0, 3.4, 4.5, 1.6],
    [6.7, 3.1, 4.7, 1.5],
    [6.3, 2.3, 4.4, 1.3]
]

y = ['setosa', 'setosa', 'versicolor', 'versicolor', 'versicolor', 'versicolor']

# Create and train the Gaussian Naive Bayes classifier
classifier = GaussianNaiveBayes()
classifier.train(X, y)

# Sample new data point to predict
new_sample = [5.7, 2.8, 4.1, 1.3]
predicted_class = classifier.predict(new_sample)

print(f"Predicted class for the new sample: {predicted_class}")


Predicted class for the new sample: versicolor
