In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
df=pd.read_csv("PlayTennis.csv")
df_encoded=df.copy()
value_maps={}
for col in df_encoded.columns:
    values=df_encoded[col].unique()
    mapping={v: i for i,v in enumerate(values)}
    df_encoded[col]=df_encoded[col].map(mapping)
    value_maps[col]=mapping

print("encoding dataframe head:")
print(df_encoded.head())

encoding dataframe head:
   Outlook  Temperature  Humidity  Wind  Play Tennis
0        0            0         0     0            0
1        0            0         0     1            0
2        1            0         0     0            1
3        2            1         0     0            1
4        2            2         1     0            1


In [3]:
 target_col=df.columns[-1]
X=df_encoded.drop(target_col,axis=1).values
y=df_encoded[target_col].values

X_train,X_test,y_train,y_test=train_test_split(
    X,y,test_size=0.4,random_state=42,stratify=y
)
print("train size;",len(X_train))
print("test size;",len(X_test))

train size; 8
test size; 6


In [4]:
class NaiveBayesCategorical:
    def __init__(self):
        pass
    def fit(self,x,y):
        X=np.array(x,dtype=int)
        y=np.array(y,dtype=int)
        self.classes_,self.class_counts_=np.unique(y,return_counts=True)
        self.n_classes_=len(self.classes_)
        self.n_features_=X.shape[1]
        self.class_priors_=self.class_counts_/self.class_counts_.sum()
        self.feature_probs_=[]
        for c_idx,c in enumerate(self.classes_):
            X_c=x[y==c]
            feature_probs_c=[]
            for j in range(self.n_features_):
                cat_counts=np.bincount(X_c[:,j])
                max_cat=X[:,j].max()
                full_counts=np.zeros(max_cat+1,dtype=float)
                full_counts[:len(cat_counts)]=cat_counts
                prods=full_counts/full_counts.sum()
                feature_probs_c.append(prods)
            self.feature_probs_.append(feature_probs_c)
        return self
    
    def predict_log_proba_single(self,x):
        log_posteriors=np.zeros(self.n_classes_)
        for c_idx, c in enumerate(self.classes_):
            log_posteriors[c_idx]=np.log(self.class_priors_[c_idx])
            for j in range(self.n_features_):
                val=int(x[j])
                probs=self.feature_probs_[c_idx][j]
                if val<len(probs) and probs[val]>0:
                    log_posteriors[c_idx]=-np.inf
                    break
        return log_posteriors
    
    def predict_log_proba_(self,X):
        return np.array([self.predict_log_proba_single(x) for x in X])
    
    def predict(self,X):
        log_proba=self.predict_log_proba_(X)
        return self.classes_[np.argmax(log_proba,axis=1)]
        
        
            

In [5]:
nb_custom=NaiveBayesCategorical()
nb_custom.fit(X_train,y_train)
print("classes:",nb_custom.classes_)
print("class priors:",nb_custom.class_priors_)

classes: [0 1]
class priors: [0.375 0.625]


In [6]:
y_pred=nb_custom.predict(X_test)
print("true labels:   ",y_test)
print("predicted labels:",y_pred)
acc=accuracy_score(y_test,y_pred)
print(f"\n Accuracy: {acc:.3f}")
cn=confusion_matrix(y_test,y_pred)
print("\n Confusion matrix:\n",cn)
inv_class_map={v:k for k,v in value_maps[target_col].items()}
target_names={inv_class_map[c] for c in nb_custom.classes_}
print("\n classfication report:")
print(classification_report(y_test,y_pred,target_names=target_names,zero_division=0))

true labels:    [1 1 0 1 0 1]
predicted labels: [0 0 0 0 0 0]

 Accuracy: 0.333

 Confusion matrix:
 [[2 0]
 [4 0]]

 classfication report:
              precision    recall  f1-score   support

          No       0.33      1.00      0.50         2
         Yes       0.00      0.00      0.00         4

    accuracy                           0.33         6
   macro avg       0.17      0.50      0.25         6
weighted avg       0.11      0.33      0.17         6



In [7]:
print("conditional probalilities (P(Feature=val | Class)):")
feature_cols=df.columns.drop(target_col)
for c in nb_custom.classes_:
    print(f"\nClass {inv_class_map[c]}:")
    for j, feat in enumerate(feature_cols):
        probs=nb_custom.feature_probs_[c][j]
        val_names=[k for k,v in value_maps[feat].items() if v< len(probs)]
        for i,val_names in enumerate(val_names):
            print(f" P({feat}={val_names} | {inv_class_map[c]})={probs[i]:.3f}")

conditional probalilities (P(Feature=val | Class)):

Class No:
 P(Outlook=Sunny | No)=0.333
 P(Outlook=Overcast | No)=0.000
 P(Outlook=Rain | No)=0.667
 P(Temperature=Hot | No)=0.333
 P(Temperature=Mild | No)=0.333
 P(Temperature=Cool | No)=0.333
 P(Humidity=High | No)=0.667
 P(Humidity=Normal | No)=0.333
 P(Wind=Weak | No)=0.000
 P(Wind=Strong | No)=1.000

Class Yes:
 P(Outlook=Sunny | Yes)=0.200
 P(Outlook=Overcast | Yes)=0.400
 P(Outlook=Rain | Yes)=0.400
 P(Temperature=Hot | Yes)=0.200
 P(Temperature=Mild | Yes)=0.400
 P(Temperature=Cool | Yes)=0.400
 P(Humidity=High | Yes)=0.200
 P(Humidity=Normal | Yes)=0.800
 P(Wind=Weak | Yes)=0.600
 P(Wind=Strong | Yes)=0.400


In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score
# Load dataset
X, y = load_iris(return_X_y=True)
# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
   X, y, test_size=0.3, random_state=42
)
# Initialize and train model
gnb = GaussianNB()
gnb.fit(X_train, y_train)
# Predictions
y_pred = gnb.predict(X_test)
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))

Accuracy: 0.9777777777777777
F1 Score: 0.9777448559670782
