**Naive Bayes** adalah sebuah algoritma klasifikasi dalam machine learning yang berdasarkan pada teorema probabilitas Bayes. Naive Bayes digunakan untuk data bertipe kategorikal dan tugas klasifikasi, di mana tujuannya adalah untuk memprediksi kategori atau kelas dari suatu instance berdasarkan distribusi probabilitas fitur-fitur yang diberikan.

# Import Library

In [1]:
import pandas as pd

# Data Loading

In [2]:
df = pd.read_excel('exams(NAIVE BAYES).xlsx')
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,test preparation course,math score,math score.1,reading score,reading score.1,writing score,writing score.1,Class_lunch
0,male,group A,high school,completed,C,67,C,67,C,63,standard
1,female,group D,some high school,none,E,40,D,59,D,55,free/reduced
2,male,group E,some college,none,D,59,D,60,D,50,free/reduced
3,male,group B,high school,none,B,77,B,78,C,68,standard
4,male,group E,associate's degree,completed,B,78,B,73,C,68,standard


# Data Preprocessing

In [3]:
# hapus kolom yang tidak perlu
df.drop(['gender','math score.1','reading score.1','writing score.1'], axis=1, inplace=True)

kolom diatas dihapus karena data yang digunakan dat dengan tipe kategorikal

In [4]:
df.head()

Unnamed: 0,race/ethnicity,parental level of education,test preparation course,math score,reading score,writing score,Class_lunch
0,group A,high school,completed,C,C,C,standard
1,group D,some high school,none,E,D,D,free/reduced
2,group E,some college,none,D,D,D,free/reduced
3,group B,high school,none,B,B,C,standard
4,group E,associate's degree,completed,B,B,C,standard


In [5]:
# train
x = df.copy()
df

Unnamed: 0,race/ethnicity,parental level of education,test preparation course,math score,reading score,writing score,Class_lunch
0,group A,high school,completed,C,C,C,standard
1,group D,some high school,none,E,D,D,free/reduced
2,group E,some college,none,D,D,D,free/reduced
3,group B,high school,none,B,B,C,standard
4,group E,associate's degree,completed,B,B,C,standard
...,...,...,...,...,...,...,...
995,group C,high school,none,B,C,C,standard
996,group D,associate's degree,completed,A,A,A,free/reduced
997,group C,some high school,none,E,E,E,free/reduced
998,group C,some college,none,B,B,A,standard


# Algoritma Naive Bayes

In [6]:
class NaiveBayesClassifier:
    # inisialisasi objek kelas
    def __init__(self, data):
        self.data = data
        self.classes = set(data['Class_lunch'])
        self.priors = {}
        self.likelihoods = {}

    # menghitung probabilitas prior
    def calculate_prior(self):
        total_samples = len(self.data)
        for cls in self.classes:
            class_samples = self.data[self.data['Class_lunch'] == cls]
            self.priors[cls] = len(class_samples) / total_samples
        print("Prior probabilities:")
        print(self.priors)

    # menghitung likelihood
    def calculate_likelihood(self, feature, value, cls):
        class_samples = self.data[self.data['Class_lunch'] == cls]
        numerator = len(class_samples[class_samples[feature] == value]) + 1
        denominator = len(class_samples) + len(set(self.data[feature]))
        likelihood = numerator / denominator
        print(f"Likelihood for {feature}={value} and Class_lunch={cls}: {likelihood}")
        return likelihood

    # training model
    def train(self):
        self.calculate_prior()

        for feature in self.data.columns[:-1]:
            self.likelihoods[feature] = {}
            for value in set(self.data[feature]):
                for cls in self.classes:
                    likelihood = self.calculate_likelihood(feature, value, cls)
                    self.likelihoods[feature][(value, cls)] = likelihood
                    
    # prediksi
    def predict(self, input_data):
        probabilities = {}

        for cls in self.classes:
            probability = self.priors[cls]
            for feature, value in input_data.items():
                likelihood = self.likelihoods[feature].get((value, cls), 1e-10)
                probability *= likelihood
                # print(f"P({feature}={value} | Class_lunch={cls}) = {likelihood}")
            probabilities[cls] = probability

        prediction = max(probabilities, key=probabilities.get)
        print("\nClass probabilities:")
        print(probabilities)
        print(f"\nPredicted Class_lunch: {prediction}")

In [7]:
classifier = NaiveBayesClassifier(x)
classifier.train()

Prior probabilities:
{'standard': 0.652, 'free/reduced': 0.348}
Likelihood for race/ethnicity=group B and Class_lunch=standard: 0.2070015220700152
Likelihood for race/ethnicity=group B and Class_lunch=free/reduced: 0.20113314447592068
Likelihood for race/ethnicity=group E and Class_lunch=standard: 0.1111111111111111
Likelihood for race/ethnicity=group E and Class_lunch=free/reduced: 0.16997167138810199
Likelihood for race/ethnicity=group C and Class_lunch=standard: 0.3181126331811263
Likelihood for race/ethnicity=group C and Class_lunch=free/reduced: 0.3286118980169972
Likelihood for race/ethnicity=group D and Class_lunch=standard: 0.2815829528158295
Likelihood for race/ethnicity=group D and Class_lunch=free/reduced: 0.2237960339943343
Likelihood for race/ethnicity=group A and Class_lunch=standard: 0.0821917808219178
Likelihood for race/ethnicity=group A and Class_lunch=free/reduced: 0.0764872521246459
Likelihood for parental level of education=some high school and Class_lunch=standard

In [8]:
# Test
input_data = {
    'race/ethnicity': 'group B',
    'parental level of education': 'high school',
    'test preparation course': 'completed',
    'math score': 'A',
    'reading score': 'DA',
    'writing score': 'A'
}

classifier.predict(input_data)


Class probabilities:
{'standard': 7.705965373237403e-14, 'free/reduced': 5.42832482718769e-15}

Predicted Class_lunch: standard
