In [41]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.metrics import accuracy_score

In [42]:
data = {
    'Age': [25, 30, 45, 35, 22, 50, 23, 40],
    'Income': [50000, 60000, 80000, 70000, 40000, 90000, 42000, 75000],
    'Marital_Status': ['Single', 'Married', 'Married', 'Single', 'Single', 'Married', 'Single', 'Married'],
    'Buy_Product': ['No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes']
}

df = pd.DataFrame(data)
print(df.shape)

df

(8, 4)


Unnamed: 0,Age,Income,Marital_Status,Buy_Product
0,25,50000,Single,No
1,30,60000,Married,Yes
2,45,80000,Married,Yes
3,35,70000,Single,No
4,22,40000,Single,No
5,50,90000,Married,Yes
6,23,42000,Single,No
7,40,75000,Married,Yes


In [43]:
# Encode categorical features
le_cat = LabelEncoder()
df['Marital_Status'] = le_cat.fit_transform(df['Marital_Status'])

# Encode target variable
le_target = LabelEncoder()
df['Buy_Product'] = le_target.fit_transform(df['Buy_Product'])

df

Unnamed: 0,Age,Income,Marital_Status,Buy_Product
0,25,50000,1,0
1,30,60000,0,1
2,45,80000,0,1
3,35,70000,1,0
4,22,40000,1,0
5,50,90000,0,1
6,23,42000,1,0
7,40,75000,0,1


In [44]:
X = df[['Age', 'Income', 'Marital_Status']]
y = df['Buy_Product']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Gaussian Naive Bayes for numerical features

In [45]:
# Use only numerical columns
X_train_num = X_train[['Age', 'Income']]
X_test_num = X_test[['Age', 'Income']]

gnb = GaussianNB()
gnb.fit(X_train_num, y_train)

y_pred_num = gnb.predict(X_test_num)
print("Accuracy (Numerical features):", np.round(accuracy_score(y_test, y_pred_num), 2))

Accuracy (Numerical features): 0.33


# Categorical Naive Bayes for categorical features

In [46]:
# Use only categorical columns
X_train_cat = X_train[['Marital_Status']]
X_test_cat = X_test[['Marital_Status']]

cnb = CategoricalNB()
cnb.fit(X_train_cat, y_train)

y_pred_cat = cnb.predict(X_test_cat)
print("Accuracy (Categorical features):", np.round(accuracy_score(y_test, y_pred_cat), 2))

Accuracy (Categorical features): 1.0


# Mixed features approach

In [47]:
# Fit Gaussian NB on numerical
gnb.fit(X_train_num, y_train)
log_prob_num = gnb.predict_log_proba(X_test_num)

# Fit Categorical NB on categorical
cnb.fit(X_train_cat, y_train)
log_prob_cat = cnb.predict_log_proba(X_test_cat)

# Combine log probabilities
log_prob_combined = log_prob_num + log_prob_cat
y_pred_combined = np.argmax(log_prob_combined, axis=1)

print("Accuracy (Combined features):", np.round(accuracy_score(y_test, y_pred_combined), 2))

Accuracy (Combined features): 0.33
