In [None]:
from sklearn.datasets import load_iris
import numpy as np

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
np.random.seed(42)  # for reproducibility
indices = np.random.permutation(len(X))
X_train = X[indices[:-30]]
X_test = X[indices[-30:]]
y_train = y[indices[:-30]]
y_test = y[indices[-30:]]

# Normalize the data
def normalize_data(data):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    normalized_data = (data - mean) / std
    return normalized_data

X_train_normalized = normalize_data(X_train)
X_test_normalized = normalize_data(X_test)

# Define a function to calculate mean
def calculate_mean(data):
    return np.sum(data) / len(data)

# Define a function to calculate standard deviation
def calculate_std(data):
    mean = calculate_mean(data)
    variance = np.sum((data - mean)**2) / len(data)
    return np.sqrt(variance)

# Define a function to calculate range
def calculate_range(data):
    return np.max(data) - np.min(data)

# Define a function to extract features
def extract_features(data):
    mean = calculate_mean(data)
    std = calculate_std(data)
    rng = calculate_range(data)
    return [mean, std, rng]

# Extract features for each feature in the dataset
X_train_extracted = np.array([extract_features(feature) for feature in X_train_normalized])
X_test_extracted = np.array([extract_features(feature) for feature in X_test_normalized])

# Naive Bayes Classifier
def fit_naive_bayes(X_train, y_train):
    classes = np.unique(y_train)
    prior = []
    mean = []
    std = []

    for c in classes:
        X_c = X_train[y_train == c]
        prior.append(len(X_c) / len(X_train))
        mean.append(np.mean(X_c, axis=0))
        std.append(np.std(X_c, axis=0))

    return prior, mean, std

def predict_naive_bayes(X_test, prior, mean, std):
    y_pred = []
    for x in X_test:
        probabilities = []
        for c in range(len(prior)):
            probability = prior[c]
            for i in range(len(x)):
                probability *= 1 / (np.sqrt(2 * np.pi) * std[c][i]) * np.exp(-(x[i] - mean[c][i])**2 / (2 * std[c][i]**2))
            probabilities.append(probability)
        y_pred.append(np.argmax(probabilities))
    return np.array(y_pred)

# Fit Naive Bayes Classifier
prior, mean, std = fit_naive_bayes(X_train_extracted, y_train)

# Predict using extracted features
y_pred_extracted = predict_naive_bayes(X_test_extracted, prior, mean, std)

# Calculate accuracy
accuracy_extracted = np.mean(y_pred_extracted == y_test)
print("Accuracy using extracted features:", accuracy_extracted)


Accuracy using extracted features: 0.7333333333333333


In [None]:
# Calculate mean and standard deviation for each feature in each class
def fit_naive_bayes(X_train, y_train):
    classes = np.unique(y_train)
    n_classes = len(classes)
    n_features = X_train.shape[1]
    means = np.zeros((n_classes, n_features))
    stds = np.zeros((n_classes, n_features))

    for c in classes:
        X_c = X_train[y_train == c]
        means[c] = np.mean(X_c, axis=0)
        stds[c] = np.std(X_c, axis=0)

    return means, stds

# Calculate class priors
def calculate_priors(y_train):
    classes = np.unique(y_train)
    priors = np.zeros(len(classes))
    for c in classes:
        priors[c] = np.sum(y_train == c) / len(y_train)
    return priors

# Gaussian probability density function
def gaussian_pdf(x, mean, std):
    exponent = -((x - mean)**2 / (2 * std**2))
    return np.exp(exponent) / (np.sqrt(2 * np.pi) * std)

# Predict class labels for test set
def predict_naive_bayes(X_test, means, stds, priors):
    n_samples, n_features = X_test.shape
    n_classes = len(means)
    likelihoods = np.zeros((n_samples, n_classes))

    for i in range(n_samples):
        for c in range(n_classes):
            likelihood = 1
            for f in range(n_features):
                likelihood *= gaussian_pdf(X_test[i, f], means[c, f], stds[c, f])
            likelihoods[i, c] = likelihood * priors[c]

    return np.argmax(likelihoods, axis=1)

# Fit Naive Bayes Classifier
means, stds = fit_naive_bayes(X_train_normalized, y_train)
priors = calculate_priors(y_train)

# Predict using the test set
y_pred = predict_naive_bayes(X_test_normalized, means, stds, priors)

# Calculate accuracy
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)


Accuracy: 0.9333333333333333
