<a href="https://colab.research.google.com/github/NWemphy/MachineLearningB_/blob/main/Naive_Bayes_225314043.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Naive Bayes**

**Norbertus Wempy Junior Keraf-225314043**


In [62]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder

class CategoricalNaiveBayes:
    def __init__(self):
        self.class_probs = {}  # Prior probability P(Ci)
        self.cond_probs = defaultdict(lambda: defaultdict(dict))  # P(Xj | Ci)

    def fit(self, X, y):
        # Menghitung prior probability P(Ci)
        class_counts = y.value_counts()  # Jumlah data untuk setiap kelas
        total_samples = len(y)
        self.class_probs = {cls: count / total_samples for cls, count in class_counts.items()}

        print("\nPrior Probability P(Ci):")
        for cls, prob in self.class_probs.items():
            print(f"P({cls}) = {prob}")

        # Menghitung likelihood P(Xj | Ci) untuk setiap fitur Xj dan setiap kelas Ci
        for col in X.columns:
            print(f"\nLikelihood P(Xj | Ci) for feature '{col}':")
            for cls in class_counts.index:
                # Filter data berdasarkan kelas
                class_data = X[y == cls][col]
                total_class_samples = len(class_data)

                # Hitung probabilitas untuk setiap kategori dalam fitur Xj
                for value in class_data.unique():
                    count_value = (class_data == value).sum()
                    prob_value = (count_value + 1) / (total_class_samples + len(class_data.unique()))  # Laplace smoothing
                    self.cond_probs[col][cls][value] = prob_value
                    print(f"P({value} | {cls}) = {prob_value}")

    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            class_scores = {}
            for cls, class_prob in self.class_probs.items():
                score = np.log(class_prob)  # Mulai dengan log dari prior
                for col, value in row.items():
                    if value in self.cond_probs[col][cls]:
                        score += np.log(self.cond_probs[col][cls][value])  # Log likelihood
                    else:
                        # Jika nilai tidak ada di training data, berikan probabilitas kecil
                        score += np.log(1 / (len(self.cond_probs[col][cls]) + len(self.cond_probs[col][cls])))
                class_scores[cls] = score
            predictions.append(max(class_scores, key=class_scores.get))  # Pilih kelas dengan skor tertinggi
        return predictions

In [63]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [64]:
df = pd.read_excel('/content/Dataset_Tenis.xlsx')
df

Unnamed: 0,Minggu,Ramalan_cuaca,Suhu,Kelembaban,Angin,Bermain_tenis
0,M1,Cerah,Panas,Tinggi,Lemah,N
1,M2,Cerah,Panas,Tinggi,Kuat,N
2,M3,Mendung,Panas,Tinggi,Lemah,Y
3,M4,Hujan,Sejuk,Tinggi,Lemah,Y
4,M5,Hujan,Dingin,Normal,Lemah,Y
5,M6,Hujan,Dingin,Normal,Kuat,N
6,M7,Mendung,Dingin,Normal,Kuat,Y
7,M8,Cerah,Sejuk,Tinggi,Lemah,N
8,M9,Cerah,Dingin,Normal,Lemah,Y
9,M10,Hujan,Sejuk,Normal,Lemah,Y


In [65]:
le_dict = {}
for col in df.columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le

In [66]:
# Pisahkan fitur dan label (hapus kolom "Minggu")
X = df.drop(columns=["Minggu", "Bermain_tenis"])
y = df["Bermain_tenis"]

In [67]:
model = CategoricalNaiveBayes()
model.fit(X, y)


Prior Probability P(Ci):
P(1) = 0.6428571428571429
P(0) = 0.35714285714285715

Likelihood P(Xj | Ci) for feature 'Ramalan_cuaca':
P(2 | 1) = 0.4166666666666667
P(1 | 1) = 0.3333333333333333
P(0 | 1) = 0.25
P(0 | 0) = 0.5714285714285714
P(1 | 0) = 0.42857142857142855

Likelihood P(Xj | Ci) for feature 'Suhu':
P(2 | 1) = 0.23076923076923078
P(3 | 1) = 0.38461538461538464
P(0 | 1) = 0.23076923076923078
P(1 | 1) = 0.15384615384615385
P(2 | 0) = 0.3333333333333333
P(0 | 0) = 0.2222222222222222
P(4 | 0) = 0.2222222222222222
P(3 | 0) = 0.2222222222222222

Likelihood P(Xj | Ci) for feature 'Kelembaban':
P(2 | 1) = 0.3333333333333333
P(0 | 1) = 0.5
P(1 | 1) = 0.16666666666666666
P(2 | 0) = 0.7142857142857143
P(0 | 0) = 0.2857142857142857

Likelihood P(Xj | Ci) for feature 'Angin':
P(1 | 1) = 0.6363636363636364
P(0 | 1) = 0.36363636363636365
P(1 | 0) = 0.42857142857142855
P(0 | 0) = 0.5714285714285714


In [68]:
input_baru = {
    'Ramalan_cuaca': le_dict['Ramalan_cuaca'].transform(['Cerah'])[0],
    'Suhu': le_dict['Suhu'].transform(['Sejuk'])[0],
    'Kelembaban': le_dict['Kelembaban'].transform(['Normal'])[0],
    'Angin': le_dict['Angin'].transform(['Lemah'])[0],
}
X_baru = pd.DataFrame([input_baru])

In [69]:
prediksi = model.predict(X_baru)[0]
hasil = le_dict['Bermain_tenis'].inverse_transform([prediksi])[0]

print("Hasil prediksi untuk X = ('Cerah', 'Sejuk', 'Normal', 'Lemah') adalah:", hasil)

Hasil prediksi untuk X = ('Cerah', 'Sejuk', 'Normal', 'Lemah') adalah: Y
