In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [14]:
class NaiveBayes:
    def __init__(self, df):
        self.df = df

    def total_elements(self):
        return self.df.shape[0]

    def calculate_yes_no(self):
        counts = self.df['buys_computer'].value_counts()
        total_yes = counts.get("yes", 0)
        total_no = counts.get("no", 0)
        return total_yes, total_no

    def calculate_feature_probabilities(self):
        total_yes, total_no = self.calculate_yes_no()
        feature_probabilities = {}
        
        for feature in ['age', 'income', 'student', 'credit_rating']:
            feature_probabilities[feature] = {}
            
            for category in self.df[feature].unique():
                count_yes = len(self.df[(self.df[feature] == category) & (self.df['buys_computer'] == 'yes')])
                count_no = len(self.df[(self.df[feature] == category) & (self.df['buys_computer'] == 'no')])
                
                prob_yes = count_yes / total_yes if total_yes > 0 else 0
                prob_no = count_no / total_no if total_no > 0 else 0
                
                feature_probabilities[feature][category] = {'yes': prob_yes, 'no': prob_no}
        
        return feature_probabilities
    
    def probability_total_yes_no(self):
        total_yes, total_no = self.calculate_yes_no()
        total_elements = self.total_elements()
        prob_y = total_yes / total_elements
        prob_n = total_no / total_elements
        return prob_y, prob_n

    def predict(self, sample):
        # Calculate probabilities
        feature_prob = self.calculate_feature_probabilities()
        prob_yes, prob_no = self.probability_total_yes_no()

        sample_prob_yes = prob_yes
        sample_prob_no = prob_no

        for feature, category in sample.items():
            if category in feature_prob[feature]:
                sample_prob_yes *= feature_prob[feature][category]['yes']
                sample_prob_no *= feature_prob[feature][category]['no']

        return 'yes' if sample_prob_yes > sample_prob_no else 'no'

    def accuracy(self, test_df):
        correct_predictions = 0
        for _, row in test_df.iterrows():
            sample = row[['age', 'income', 'student', 'credit_rating']].to_dict()
            actual = row['buys_computer']
            prediction = self.predict(sample)
            if prediction == actual:
                correct_predictions += 1
        return correct_predictions / len(test_df)

In [15]:
def main():
    dataset_file = "studentdata2.csv"
    df = pd.read_csv(dataset_file)

    train_df, test_df = train_test_split(df, test_size=0.1, random_state=0)
    
    model = NaiveBayes(train_df)
    
    accuracy = model.accuracy(test_df)
    print(f"Accuracy on test set: {accuracy:.2f}")

    test_samples = [
        {'age': 'youth', 'income': 'medium', 'student': 'yes', 'credit_rating': 'fair'}
    ]

    for i, sample in enumerate(test_samples, 1):
        prediction = model.predict(sample)
        print(f"Prediction for test sample {i}: {prediction}")        

In [16]:
if __name__ == "__main__":
    main()

Accuracy on test set: 1.00
Prediction for test sample 1: yes
