In [3]:
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [4]:
# Generate sample data
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    random_state=42
)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create and train the Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

# Make predictions
predictions = nb.predict(X_test)
accuracy = (predictions == y_test).mean()
print(f"Accuracy: {accuracy:.2f}")

# Get probability estimates
probabilities = nb.predict_proba(X_test[:5])
print("\nProbability estimates for first 5 samples:")
print(probabilities)

Accuracy: 0.80

Probability estimates for first 5 samples:
[[0.95352414 0.04647586]
 [0.0165542  0.9834458 ]
 [0.86480177 0.13519823]
 [0.87486561 0.12513439]
 [0.03083089 0.96916911]]


In [5]:
class SimpleNaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        
        # Calculate prior probabilities P(y)
        self.priors = {}
        for cls in self.classes:
            self.priors[cls] = np.mean(y == cls)
        
        # Calculate mean and variance for P(x|y)
        self.mean = np.zeros((n_classes, n_features))
        self.var = np.zeros((n_classes, n_features))
        
        for idx, cls in enumerate(self.classes):
            X_cls = X[y == cls]
            self.mean[idx, :] = X_cls.mean(axis=0)
            self.var[idx, :] = X_cls.var(axis=0)
    
    def _calculate_likelihood(self, x, mean, var):
        # Gaussian likelihood
        exponent = np.exp(-((x - mean) ** 2) / (2 * var))
        return np.prod(1 / np.sqrt(2 * np.pi * var) * exponent)
    
    def predict(self, X):
        predictions = []
        
        for x in X:
            posteriors = []
            
            # Calculate posterior probability for each class
            for idx, cls in enumerate(self.classes):
                prior = self.priors[cls]
                likelihood = self._calculate_likelihood(x, self.mean[idx, :], self.var[idx, :])
                posterior = prior * likelihood
                posteriors.append(posterior)
            
            # Select class with highest posterior probability
            predictions.append(self.classes[np.argmax(posteriors)])
            
        return np.array(predictions)

In [6]:
# BASIS OF NAIVE BAYES IT IS AROUND BAYES THEOREM 
# P(y|X) = P(X|y) * P(y) / P(X)

In [None]:
# from sklearn.naive_bayes import GaussianNB  # For continuous data
# from sklearn.naive_bayes import MultinomialNB  # For discrete counts (text)
# from sklearn.naive_bayes import BernoulliNB  # For binary features

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample text data
texts = [
    "The movie was great",
    "Terrible film, waste of time",
    "I loved the movie",
    "Poor acting, bad plot"
]
labels = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Convert text to feature vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)

# Train Naive Bayes
nb = MultinomialNB()
nb.fit(X, labels)

# Predict new text
new_text = ["This movie was awesome"]
new_X = vectorizer.transform(new_text)
prediction = nb.predict(new_X)
print(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")

Prediction: Positive
