In [2]:
!pip install numpy pandas scikit-learn




In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB # The specific Naive Bayes model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import load_iris


In [5]:
#Slpitting data
iris = load_iris()
x = iris.data
y = iris.target
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3 , random_state=42)

In [6]:
gnb = GaussianNB()
gnb.fit(x_train,y_train)
print("Model training complete. Priors and Likelihoods are stored Internally")

Model training complete. Priors and Likelihoods are stored Internally


In [13]:

prior_probs = gnb.class_prior_
likelihood_means = gnb.theta_
likelihood_variances = gnb.var_
print("--- Bayesian Inference Parameters (Priors and Likelihoods) ---")
print("\n1. Prior Probabilities P(Class):")
print(f"P(Setosa):     {prior_probs[0]:.4f}")
print(f"P(Versicolor): {prior_probs[1]:.4f}")
print(f"P(Virginica):  {prior_probs[2]:.4f}")
print("(Sum should be 1.0)")
feature_names = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']

print("\n2. Gaussian Likelihood Parameters (Mean and Variance):")
for i, class_name in enumerate(iris.target_names):
    print(f"\n--- Class: {class_name} (Index {i}) ---")
    params_df = pd.DataFrame({
        'Feature': feature_names,
        'Mean (theta_)': likelihood_means[i].round(3),
        'Variance (var_)': likelihood_variances[i].round(5)
    })
    print(params_df.to_string(index=False))

--- Bayesian Inference Parameters (Priors and Likelihoods) ---

1. Prior Probabilities P(Class):
P(Setosa):     0.2952
P(Versicolor): 0.3524
P(Virginica):  0.3524
(Sum should be 1.0)

2. Gaussian Likelihood Parameters (Mean and Variance):

--- Class: setosa (Index 0) ---
     Feature  Mean (theta_)  Variance (var_)
Sepal Length          4.965          0.11197
 Sepal Width          3.377          0.13659
Petal Length          1.465          0.03326
 Petal Width          0.248          0.01153

--- Class: versicolor (Index 1) ---
     Feature  Mean (theta_)  Variance (var_)
Sepal Length          5.862          0.27533
 Sepal Width          2.724          0.08725
Petal Length          4.211          0.23934
 Petal Width          1.303          0.04134

--- Class: virginica (Index 2) ---
     Feature  Mean (theta_)  Variance (var_)
Sepal Length          6.559          0.42241
 Sepal Width          2.986          0.09630
Petal Length          5.546          0.28843
 Petal Width          2.0

In [14]:

y_pred = gnb.predict(x_test)
y_proba = gnb.predict_proba(x_test)
print("\nFirst 5 Predicted Class Probabilities (Posteriors):")
print(y_proba[:5].round(3))


First 5 Predicted Class Probabilities (Posteriors):
[[0.    0.996 0.004]
 [1.    0.    0.   ]
 [0.    0.    1.   ]
 [0.    0.975 0.025]
 [0.    0.832 0.168]]


In [15]:
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy (Minimum-Error-Rate Decision): {accuracy:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy (Minimum-Error-Rate Decision): 0.9778

Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45



In [16]:
Loss_Matrix = np.array([
    [0, 1, 10],  # True is 0 (Setosa)
    [1, 0, 1],   # True is 1 (Versicolor)
    [1, 1, 0]    # True is 2 (Virginica)
])
risk_pred = []
for i in range(len(x_test)):
    posteriors = y_proba[i]
    conditional_risk = np.dot(posteriors, Loss_Matrix)
    optimal_action = np.argmin(conditional_risk)
    risk_pred.append(optimal_action)
y_risk_pred = np.array(risk_pred)

new_accuracy = accuracy_score(y_test, y_risk_pred)

print("\n--- Bayesian Decision Theory (Custom Risk) Results ---")
print(f"Accuracy with Custom Loss (Decision Rule): {new_accuracy:.4f}")
print("Confusion Matrix with Custom Loss:")
print(confusion_matrix(y_test, y_risk_pred))


--- Bayesian Decision Theory (Custom Risk) Results ---
Accuracy with Custom Loss (Decision Rule): 0.9778
Confusion Matrix with Custom Loss:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]


In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import load_iris


iris = load_iris()
X = iris.data    # Features
y = iris.target  # Target labels

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

gnb = GaussianNB()
gnb.fit(X_train, y_train)

print("--- Bayesian Inference Parameters (Priors and Likelihoods) ---")
# 1. Priors P(Class)
print("\n1. Prior Probabilities P(Class):")
for i, name in enumerate(iris.target_names):
    print(f"P({name}): {gnb.class_prior_[i]:.4f}")

print("\n2. Gaussian Likelihood Means (theta_):")
likelihood_df = pd.DataFrame(gnb.theta_, columns=iris.feature_names, index=iris.target_names)
print(likelihood_df.round(3))

y_proba = gnb.predict_proba(X_test)

y_pred_min_error = gnb.predict(X_test)

accuracy_min_error = accuracy_score(y_test, y_pred_min_error)
print("\n--- Minimum-Error-Rate Decision (Standard Accuracy) ---")
print(f"Accuracy: {accuracy_min_error:.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_min_error))
print("Classification Report:")
print(classification_report(y_test, y_pred_min_error, target_names=iris.target_names))

Loss_Matrix = np.array([
    [0,       1,       10],  # True Class 0 (Setosa)
    [1,       0,       1],   # True Class 1 (Versicolor)
    [1,       1,       0]    # True Class 2 (Virginica)
])

risk_predictions = []
for posteriors in y_proba:
    conditional_risk = np.dot(posteriors, Loss_Matrix)

    optimal_action = np.argmin(conditional_risk)
    risk_predictions.append(optimal_action)

y_pred_custom_risk = np.array(risk_predictions)

accuracy_custom_risk = accuracy_score(y_test, y_pred_custom_risk)
print("\n--- Bayesian Decision Theory (Custom Risk) Results ---")
print(f"Accuracy with Custom Loss: {accuracy_custom_risk:.4f}")
print("Confusion Matrix with Custom Loss:")
print(confusion_matrix(y_test, y_pred_custom_risk))
print("Classification Report with Custom Loss:")
print(classification_report(y_test, y_pred_custom_risk, target_names=iris.target_names))

--- Bayesian Inference Parameters (Priors and Likelihoods) ---

1. Prior Probabilities P(Class):
P(setosa): 0.2952
P(versicolor): 0.3524
P(virginica): 0.3524

2. Gaussian Likelihood Means (theta_):
            sepal length (cm)  sepal width (cm)  petal length (cm)  \
setosa                  4.965             3.377              1.465   
versicolor              5.862             2.724              4.211   
virginica               6.559             2.986              5.546   

            petal width (cm)  
setosa                 0.248  
versicolor             1.303  
virginica              2.005  

--- Minimum-Error-Rate Decision (Standard Accuracy) ---
Accuracy: 0.9778
Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      0.92      0.96        13
   virginica       0.93      1.00      0.96        13

    accuracy               