In [46]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB,CategoricalNB
from sklearn.metrics import accuracy_score,classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

##### Example 1

In [2]:
X,y=load_iris(return_X_y=True)

In [3]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [4]:
y[:5]

array([0, 0, 0, 0, 0])

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [7]:
model = GaussianNB()
model.fit(X_train,y_train)

In [10]:
y_pred = model.predict(X_test)
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        11

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



##### Example 2

In [33]:
df = sns.load_dataset('tips')

In [34]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [35]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [36]:
df["time"].value_counts()

time
Dinner    176
Lunch      68
Name: count, dtype: int64

In [54]:
label_encoders = {}
for col in ['sex', 'smoker', 'day','time']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le 

In [55]:
cont_features = ['total_bill', 'tip']
cat_features = ['sex', 'smoker', 'day']
target = 'time'

In [56]:
X_cont = df[cont_features]
X_cat = df[cat_features]
y = df[target]

In [57]:
X_cont

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.50
3,23.68,3.31
4,24.59,3.61
...,...,...
239,29.03,5.92
240,27.18,2.00
241,22.67,2.00
242,17.82,1.75


In [58]:
X_cat

Unnamed: 0,sex,smoker,day
0,0,0,2
1,1,0,2
2,1,0,2
3,1,0,2
4,0,0,2
...,...,...,...
239,1,0,1
240,0,1,1
241,1,1,1
242,1,0,1


In [59]:
X_cont_train, X_cont_test, X_cat_train, X_cat_test, y_train, y_test = train_test_split(
    X_cont, X_cat, y, test_size=0.2, random_state=42)

In [60]:
# continuous variable
scaler = StandardScaler()
X_cont_train_scaled = scaler.fit_transform(X_cont_train)
X_cont_test_scaled = scaler.transform(X_cont_test)
gnb = GaussianNB()
gnb.fit(X_cont_train_scaled, y_train)

In [61]:
# Train Categorical Naïve Bayes for categorical features
cat_nb = CategoricalNB()
cat_nb.fit(X_cat_train, y_train)

In [62]:
# P(Y∣X)=P(Y∣Xcont)×P(Y∣Xcat)
prob_cont = gnb.predict_proba(X_cont_test_scaled)
prob_cat = cat_nb.predict_proba(X_cat_test)

In [63]:
final_prob = prob_cont * prob_cat

In [64]:
final_predictions = np.argmax(final_prob, axis=1)

In [66]:
accuracy = accuracy_score(y_test, final_predictions)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.96
