In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import chi2_contingency

In [3]:
# data
data = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40', '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'fair', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'no'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

df = pd.DataFrame(data)

In [4]:
# A1
# Calculating the prior probabilities by counting the occurrences of each class and normalizing.
prior_probabilities = df['buys_computer'].value_counts(normalize=True)
print(f"Prior Probability for Each Class:\n{prior_probabilities}\n")

Prior Probability for Each Class:
yes    0.642857
no     0.357143
Name: buys_computer, dtype: float64



In [5]:
# A2
# Calculating class conditional densities for each feature and class by computing the mean of occurrences.
class_conditional_densities = {}
for col in df.columns[:-1]:
    for class_val in df['buys_computer'].unique():
        subset = df[df['buys_computer'] == class_val][col]
        mean = subset.value_counts(normalize=True)
        class_conditional_densities[(col, class_val)] = mean

print("Class Conditional Densities:")
for key, value in class_conditional_densities.items():
    print(f"{key}: \n{value}\n")



Class Conditional Densities:
('age', 'no'): 
<=30    0.6
>40     0.4
Name: age, dtype: float64

('age', 'yes'): 
31…40    0.444444
>40      0.333333
<=30     0.222222
Name: age, dtype: float64

('income', 'no'): 
high      0.4
medium    0.4
low       0.2
Name: income, dtype: float64

('income', 'yes'): 
medium    0.444444
low       0.333333
high      0.222222
Name: income, dtype: float64

('student', 'no'): 
no     0.8
yes    0.2
Name: student, dtype: float64

('student', 'yes'): 
yes    0.666667
no     0.333333
Name: student, dtype: float64

('credit_rating', 'no'): 
fair         0.6
excellent    0.2
no           0.2
Name: credit_rating, dtype: float64

('credit_rating', 'yes'): 
fair         0.666667
excellent    0.333333
Name: credit_rating, dtype: float64



In [6]:
# A3
# Performing a chi-square independence test for the first two features and determining the independence test result.
contingency_table = pd.crosstab(df['age'], df['income'])
chi2, p, _, _ = chi2_contingency(contingency_table)
independence_test_result = "Independent" if p > 0.05 else "Dependent"
print(f"Independence Test Result: {independence_test_result}\n")


Independence Test Result: Independent



In [7]:
# A4
# Split the data into training and testing sets
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['age'] = label_encoder.fit_transform(df['age'])
df['income'] = label_encoder.fit_transform(df['income'])
df['student'] = label_encoder.fit_transform(df['student'])
df['credit_rating'] = label_encoder.fit_transform(df['credit_rating'])
df['buys_computer'] = label_encoder.fit_transform(df['buys_computer'])

x = df.drop("buys_computer", axis=1)
y = df["buys_computer"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

model = GaussianNB()
model.fit(x_train, y_train)
predictions = model.predict(x_train)
accuracy_nb = accuracy_score(y_train, predictions)
print(f"A4. Naïve-Bayes Classifier Accuracy: {accuracy_nb:.2f}\n")


A4. Naïve-Bayes Classifier Accuracy: 0.73



In [13]:
# A5
from sklearn.naive_bayes import GaussianNB

X = data.drop(columns=['status','name'])
y = data['status']
# Split the data into training and test sets
Tr_X, Te_X, Tr_y, Te_y = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the Gaussian Naive Bayes model
model = GaussianNB()
model.fit(Tr_X, Tr_y)

# Evaluate the Navie Bayes model
predicted_labels = model.predict(Te_X)
accuracy = sum(predicted_labels == Te_y) / len(Te_y)
print("Accuracy:", accuracy)


Accuracy: 0.6923076923076923
