In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler


In [2]:
def calculate_class_means(X, y, classes):
    """Calculate mean for each feature in each class"""
    n_features = X.shape[1]
    means = np.zeros((len(classes), n_features))

    for idx, c in enumerate(classes):
        means[idx] = np.mean(X[y == c], axis=0)
    return means

def calculate_class_variances(X, y, classes):
    """Calculate variance for each feature in each class"""
    n_features = X.shape[1]
    variances = np.zeros((len(classes), n_features))

    for idx, c in enumerate(classes):
        variances[idx] = np.var(X[y == c], axis=0)
    return variances

def calculate_class_priors(y, classes):
    """Calculate prior probabilities for each class"""
    n_samples = len(y)
    priors = np.zeros(len(classes))

    for idx, c in enumerate(classes):
        priors[idx] = np.sum(y == c) / float(n_samples)
    return priors

def calculate_likelihood(x, mean, variance):
    """Calculate likelihood using Gaussian probability density function"""
    numerator = np.exp(-(x - mean) ** 2 / (2 * variance))
    denominator = np.sqrt(2 * np.pi * variance)
    return np.sum(np.log(numerator / denominator))

def predict_single_sample(x, classes, means, variances, priors):
    """Predict class for a single sample"""
    posteriors = []

    for idx, _ in enumerate(classes):
        prior = np.log(priors[idx])
        likelihood = calculate_likelihood(x, means[idx], variances[idx])
        posterior = prior + likelihood
        posteriors.append(posterior)

    return classes[np.argmax(posteriors)]

def predict_samples(X, classes, means, variances, priors):
    """Predict classes for multiple samples"""
    return np.array([predict_single_sample(x, classes, means, variances, priors) for x in X])



In [3]:
# Load and prepare the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
classes = np.unique(y)



In [4]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [5]:
# Train the model (calculate parameters)
means = calculate_class_means(X_train_scaled, y_train, classes)
variances = calculate_class_variances(X_train_scaled, y_train, classes)
priors = calculate_class_priors(y_train, classes)

# Make predictions
y_pred = predict_samples(X_test_scaled, classes, means, variances, priors)



In [6]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))



Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [7]:
# Example prediction for a single sample
sample = X_test_scaled[0].reshape(1, -1)
prediction = predict_samples(sample, classes, means, variances, priors)
print(f"\nSample prediction: {iris.target_names[prediction[0]]}")

# Print learned parameters for each class
print("\nLearned Parameters:")
for i, class_name in enumerate(iris.target_names):
    print(f"\nClass: {class_name}")
    print(f"Mean values: {means[i]}")
    print(f"Variance values: {variances[i]}")
    print(f"Prior probability: {priors[i]:.3f}")


Sample prediction: versicolor

Learned Parameters:

Class: setosa
Mean values: [-0.99853884  0.87386315 -1.30465921 -1.25253455]
Variance values: [0.18410123 0.76235228 0.010837   0.01955553]
Prior probability: 0.333

Class: versicolor
Mean values: [ 0.13450779 -0.65050066  0.29500775  0.18503413]
Variance values: [0.4263546  0.50051842 0.07416509 0.07345657]
Prior probability: 0.342

Class: virginica
Mean values: [ 0.88273678 -0.21241023  1.02797565  1.09012776]
Variance values: [0.61899202 0.49565077 0.09383304 0.14620106]
Prior probability: 0.325
