<a href="https://colab.research.google.com/github/Daalleee/Natural-Language-Processing-NLP-/blob/main/Perhitungan_MNB_%26_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np

# Dataset & uji
X = [[1,0,0],[0,0,1],[1,1,0],[0,0,1],[1,0,0]]
y = [1,-1,1,-1,1]
feature_names = ["hadiah","gratis","tugas"]
x_test = [1,0,1]

# NB dengan Laplace smoothing
classes = sorted(set(y))
V = len(X[0])
y_arr = np.array(y); X_arr = np.array(X)
priors = {c: (y_arr==c).sum()/len(y_arr) for c in classes}
counts_per_class = {c: X_arr[y_arr==c].sum(axis=0) for c in classes}
alpha = 1.0
likelihood = {}
for c in classes:
    denom = int(counts_per_class[c].sum()) + alpha*V
    likelihood[c] = ((counts_per_class[c] + alpha)/denom).tolist()

# Posterior tak ternormalisasi & normalisasi
post_unnorm = {}
for c in classes:
    prod = 1.0
    for i, xi in enumerate(x_test):
        if xi>0:
            prod *= likelihood[c][i]
    post_unnorm[c] = priors[c]*prod
Z = sum(post_unnorm.values())
post = {c: post_unnorm[c]/Z for c in classes}
nb_pred = max(post.items(), key=lambda kv: kv[1])[0]

# Cetak hasil rapi
print("STEP 1 – Multinomial Naive Bayes")
print("Prior:", {"Spam": priors[1], "NotSpam": priors[-1]})
print("Likelihood (Laplace α=1):")
for c in classes:
    cname = "Spam" if c==1 else "NotSpam"; vals = likelihood[c]
    print(f"  {cname}: hadiah={vals[0]:.6f}, gratis={vals[1]:.6f}, tugas={vals[2]:.6f}")
print(f"Posterior* (unnorm): Spam={post_unnorm[1]:.8f}, NotSpam={post_unnorm[-1]:.8f}")
print(f"Posterior (norm):    Spam={post[1]:.8f}, NotSpam={post[-1]:.8f}")
print("Prediksi NB:", "Spam(1)" if nb_pred==1 else "NotSpam(-1)")

STEP 1 – Multinomial Naive Bayes
Prior: {'Spam': np.float64(0.6), 'NotSpam': np.float64(0.4)}
Likelihood (Laplace α=1):
  NotSpam: hadiah=0.200000, gratis=0.200000, tugas=0.600000
  Spam: hadiah=0.571429, gratis=0.285714, tugas=0.142857
Posterior* (unnorm): Spam=0.04897959, NotSpam=0.04800000
Posterior (norm):    Spam=0.50505051, NotSpam=0.49494949
Prediksi NB: Spam(1)


In [9]:
# -*- coding: utf-8 -*-
"""
STEP 2 – SVM Linear (Perceptron-style update, 5 epoch)
Aturan: jika y*(w·x+b) ≤ 0 → w ← w + η*y*x; b ← b + η*y (η=1)
"""
import numpy as np

# Dataset & uji
X = [[1,0,0],[0,0,1],[1,1,0],[0,0,1],[1,0,0]]
y = [1,-1,1,-1,1]
x_test = [1,0,1]

w = np.zeros(len(X[0]), dtype=float); b = 0.0
logs = []; step = 0
for ep in range(1,6):
    for i in range(len(X)):
        x_i = np.array(X[i], dtype=float); y_i = float(y[i])
        f = float(np.dot(w, x_i) + b); margin = y_i * f
        updated = False
        if margin <= 0:
            w = w + 1.0 * y_i * x_i
            b = b + 1.0 * y_i
            updated = True
        step += 1
        logs.append(
            f"Iter {step:02d} (Epoch {ep}, Sampel {i+1}): x={x_i.tolist()}, y={int(y_i)}, "
            f"f(x)={f:.3f}, y*f={margin:.3f}, update={'YA' if updated else 'TIDAK'}, "
            f"w={w.tolist()}, b={b:.3f}"
        )

f_test = float(np.dot(w, np.array(x_test)) + b)
pred = 1 if f_test >= 0 else -1
print("STEP 2 – SVM Linear (Perceptron, 5 epoch)")
print(f"w_final={w.tolist()}, b_final={b}")
print(f"f_test(x)={f_test:.3f} ⇒ prediksi={'Spam(1)' if pred==1 else 'NotSpam(-1)'} (aturan f≥0⇒Spam)")
print("\nLog iterasi (semua langkah):")
for line in logs:
    print(line)

STEP 2 – SVM Linear (Perceptron, 5 epoch)
w_final=[1.0, 0.0, -1.0], b_final=0.0
f_test(x)=0.000 ⇒ prediksi=Spam(1) (aturan f≥0⇒Spam)

Log iterasi (semua langkah):
Iter 01 (Epoch 1, Sampel 1): x=[1.0, 0.0, 0.0], y=1, f(x)=0.000, y*f=0.000, update=YA, w=[1.0, 0.0, 0.0], b=1.000
Iter 02 (Epoch 1, Sampel 2): x=[0.0, 0.0, 1.0], y=-1, f(x)=1.000, y*f=-1.000, update=YA, w=[1.0, 0.0, -1.0], b=0.000
Iter 03 (Epoch 1, Sampel 3): x=[1.0, 1.0, 0.0], y=1, f(x)=1.000, y*f=1.000, update=TIDAK, w=[1.0, 0.0, -1.0], b=0.000
Iter 04 (Epoch 1, Sampel 4): x=[0.0, 0.0, 1.0], y=-1, f(x)=-1.000, y*f=1.000, update=TIDAK, w=[1.0, 0.0, -1.0], b=0.000
Iter 05 (Epoch 1, Sampel 5): x=[1.0, 0.0, 0.0], y=1, f(x)=1.000, y*f=1.000, update=TIDAK, w=[1.0, 0.0, -1.0], b=0.000
Iter 06 (Epoch 2, Sampel 1): x=[1.0, 0.0, 0.0], y=1, f(x)=1.000, y*f=1.000, update=TIDAK, w=[1.0, 0.0, -1.0], b=0.000
Iter 07 (Epoch 2, Sampel 2): x=[0.0, 0.0, 1.0], y=-1, f(x)=-1.000, y*f=1.000, update=TIDAK, w=[1.0, 0.0, -1.0], b=0.000
Iter 08 (Epo

In [10]:
# -*- coding: utf-8 -*-
"""
STEP 3 – Random Forest (Gini fitur "hadiah" + 2 stump: 'hadiah', 'tugas')
"""

# Dataset & uji
X = [[1,0,0],[0,0,1],[1,1,0],[0,0,1],[1,0,0]]
y = [1,-1,1,-1,1]
x_test = [1,0,1]

# Gini impurity
def gini_impurity(labels):
    total = len(labels)
    if total == 0: return 0.0
    p_pos = sum(1 for v in labels if v == 1) / total
    p_neg = 1 - p_pos
    return 1.0 - (p_pos**2 + p_neg**2)

root_gini = gini_impurity(y)
idx_hadiah = 0
left_labels = [y[i] for i in range(len(X)) if X[i][idx_hadiah] == 1]
right_labels = [y[i] for i in range(len(X)) if X[i][idx_hadiah] == 0]
left_gini = gini_impurity(left_labels)
right_gini = gini_impurity(right_labels)
weighted = (len(left_labels)/len(X))*left_gini + (len(right_labels)/len(X))*right_gini

# Dua stump
def tree_A_predict(x):  # hadiah
    return 1 if x[0] == 1 else -1
def tree_B_predict(x):  # tugas
    return -1 if x[2] == 1 else 1

pred_A = tree_A_predict(x_test)
pred_B = tree_B_predict(x_test)
votes = [pred_A, pred_B]
spam_votes = sum(1 for v in votes if v == 1)
notspam_votes = len(votes) - spam_votes
rf_pred = 1 if spam_votes > notspam_votes else (-1 if spam_votes < notspam_votes else None)

print("STEP 3 – Random Forest (Gini + 2 stump)")
print(f"Gini(root)={root_gini:.2f}")
print(f"Split 'hadiah': left(hadiah=1) gini={left_gini:.2f}, right(hadiah=0) gini={right_gini:.2f}, weighted={weighted:.2f}")
print(f"Tree A (hadiah) pred={pred_A}, Tree B (tugas) pred={pred_B}, votes={votes}, majority={rf_pred}")

STEP 3 – Random Forest (Gini + 2 stump)
Gini(root)=0.48
Split 'hadiah': left(hadiah=1) gini=0.00, right(hadiah=0) gini=0.00, weighted=0.00
Tree A (hadiah) pred=1, Tree B (tugas) pred=-1, votes=[1, -1], majority=None
