In [1]:
# Libraries

import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Implementation of Naive Bayes

class NaiveBayes:
    def __init__(self):
        self.class_priors = {}
        self.likelihoods = {}
        self.classes = None
        self.features_count = None

    def fit(self, X, y):
        """
        Fit the Naive Bayes model to the data.
        X: 2D array-like of shape (n_samples, n_features)
        y: 1D array-like of shape (n_samples,)
        """
        self.classes = np.unique(y)
        self.features_count = X.shape[1]
        self.class_priors = {cls: np.mean(y == cls) for cls in self.classes}
        
        # Calculate the likelihoods for each feature given each class
        self.likelihoods = {cls: {} for cls in self.classes}
        for cls in self.classes:
            X_cls = X[y == cls]
            self.likelihoods[cls]['mean'] = np.mean(X_cls, axis=0)
            self.likelihoods[cls]['var'] = np.var(X_cls, axis=0) + 1e-6  # Adding a small value to avoid division by zero

    def _calculate_likelihood(self, X, cls):
        """
        Calculate the Gaussian likelihood of the data X for class cls.
        """
        mean = self.likelihoods[cls]['mean']
        var = self.likelihoods[cls]['var']
        numerator = np.exp(- (X - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _calculate_posterior(self, X):
        """
        Calculate the posterior probability for each class given the input X.
        """
        posteriors = {}
        for cls in self.classes:
            prior = np.log(self.class_priors[cls])
            likelihood = np.sum(np.log(self._calculate_likelihood(X, cls)))
            posteriors[cls] = prior + likelihood
        return max(posteriors, key=posteriors.get)

    def predict(self, X):
        """
        Predict the class for each sample in X.
        X: 2D array-like of shape (n_samples, n_features)
        """
        return np.array([self._calculate_posterior(x) for x in X])

In [3]:
# Function for generating synthetic data

def generate_synthetic_data(n_samples_per_class=100, n_features=2, mean_0=0, mean_1=2, var_0=1, var_1=1):
    """
    Generate synthetic data for a binary classification problem.
    
    Parameters:
    - n_samples_per_class: int, number of samples per class
    - n_features: int, number of features
    - mean_0: float, mean of features for class 0
    - mean_1: float, mean of features for class 1
    - var_0: float, variance of features for class 0
    - var_1: float, variance of features for class 1
    
    Returns:
    - X: 2D numpy array of shape (n_samples, n_features)
    - y: 1D numpy array of shape (n_samples,)
    """
    # Generate data for class 0
    X0 = np.random.normal(loc=mean_0, scale=np.sqrt(var_0), size=(n_samples_per_class, n_features))
    y0 = np.zeros(n_samples_per_class)
    
    # Generate data for class 1
    X1 = np.random.normal(loc=mean_1, scale=np.sqrt(var_1), size=(n_samples_per_class, n_features))
    y1 = np.ones(n_samples_per_class)
    
    # Combine the data
    X = np.vstack((X0, X1))
    y = np.concatenate((y0, y1))
    
    return X, y

In [4]:
# Evaluate Accuracy

def evaluate_f1_score(y_true, y_pred):
    """
    Calculate the F1 score of predictions.
    
    Parameters:
    - y_true: 1D numpy array, the true labels
    - y_pred: 1D numpy array, the predicted labels
    
    Returns:
    - f1_score: float, the F1 score of the predictions
    """
    # Calculate True Positives, False Positives, False Negatives
    tp = np.sum((y_pred == 1) & (y_true == 1))
    fp = np.sum((y_pred == 1) & (y_true == 0))
    fn = np.sum((y_pred == 0) & (y_true == 1))
    
    # Calculate precision and recall
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    
    # Calculate F1 score
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1_score:.2f}")

In [5]:
# Generating synthetic data

X, y = generate_synthetic_data(n_samples_per_class=50, n_features=2, mean_0=0, mean_1=2, var_0=1, var_1=1)

In [6]:
# Perform train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Create a Naive Bayes classifier

nb = NaiveBayes()

In [8]:
# Fit the training data to the model

nb.fit(X_train, y_train)

In [9]:
# Predicting the classes

y_pred = nb.predict(X_test)

In [10]:
# Evaluate F1 score

evaluate_f1_score(y_test, y_pred)

Precision: 0.88
Recall: 0.88
F1 Score: 0.88
