# 🧠 Day 21: Naive Bayes From Scratch (No Sklearn!)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from math import sqrt, pi, exp, log

# Load and preprocess data
df = pd.read_csv('Iris.csv')
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])
X = df.drop(['Species', 'Id'], axis=1).values
y = df['Species'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
class NaiveBayesScratch:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)
            self.priors[c] = X_c.shape[0] / float(X.shape[0])

    def gaussian_pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X):
        results = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                class_conditional = np.sum(np.log(self.gaussian_pdf(c, x)))
                posterior = prior + class_conditional
                posteriors.append(posterior)
            results.append(np.argmax(posteriors))
        return np.array(results)

In [None]:
# Train and evaluate
model = NaiveBayesScratch()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)
print("Predicted:", y_pred)
print("Actual:   ", y_test)