In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
print_every  = 100        # ← 100 epoch마다 모니터링
epochs       = 10_000
max_attempts = 10
lr           = 0.1

In [5]:
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]], dtype=float)
y = np.array([[0], [1], [1], [0]], dtype=float)

In [7]:
sigmoid  = lambda z: 1 / (1 + np.exp(-z))
dsigmoid = lambda z: sigmoid(z) * (1 - sigmoid(z))

def predict(X_, W1, b1, W2, b2):
    a1 = sigmoid(X_ @ W1 + b1)
    a2 = sigmoid(a1 @ W2 + b2)
    return a2

In [141]:
def draw_xor_network(W1, b1, W2, b2,
                     R=0.25,
                     figsize=(9, 4.5),
                     title="Final Weights of Trained XOR Perceptron"):
    # ─────── 노드 배치 ───────
    nodes = {'I1': (0,  1.0), 'I2': (0, -1.0),
             'H1': (2,  1.0), 'H2': (2, -1.0),
             'O1': (3.5,  0.0)}

    fig, ax = plt.subplots(figsize=figsize)
    ax.axis('off')

    # ─────── 노드(원+이름) ───────
    for name, (x, y) in nodes.items():
        ax.add_patch(plt.Circle((x, y), R, fc='white', ec='black', zorder=3))
        ax.text(x, y, name, ha='center', va='center',
                fontsize=11, weight='bold', zorder=4)

    # ─────── 입력 → 은닉 ───────
    straight_offset = -0.2       # 수평 라벨 y-편차
    tang_offset     = 0.35       # 대각선 라벨을 ‘선 방향’으로 옮길 거리

    for i, src in enumerate(['I1', 'I2']):
        for j, dst in enumerate(['H1', 'H2']):
            x0, y0 = nodes[src]
            x1, y1 = nodes[dst]
            ax.plot([x0 + R, x1 - R], [y0, y1], color='black', zorder=1)

            midx, midy = (x0 + x1) / 2, (y0 + y1) / 2
            w_txt = f"{W1[i, j]:.2f}"

            if np.isclose(y0, y1):            # ── 수평 ──
                direction = 1 if y0 < 0 else -1
                ax.text(midx, midy + direction * straight_offset,
                        w_txt, color='royalblue',
                        ha='center', va='center', fontsize=9)
            else:                             # ── 대각 ──
                dx, dy  = x1 - x0, y1 - y0
                length  = np.hypot(dx, dy)
                tx, ty  = dx / length, dy / length      # 단위 접선벡터
                sign    = 1 if (y1 > y0) else -1        # 위쪽 대각선 +, 아래쪽 –
                ax.text(midx + sign * tx * tang_offset,
                        midy + sign * ty * 2 * tang_offset,
                        w_txt, color='royalblue',
                        ha='center', va='center',
                        rotation=np.degrees(np.arctan2(dy, dx)),
                        rotation_mode='anchor', fontsize=9)

    # ─────── 은닉 → 출력 (기존 방식) ───────
    diag_offset = 0.28
    for j, src in enumerate(['H1', 'H2']):
        x0, y0 = nodes[src];  x1, y1 = nodes['O1']
        ax.plot([x0 + R, x1 - R], [y0, y1], color='black', zorder=1)
        angle = np.degrees(np.arctan2(y1 - y0, x1 - x0))
        dy_lab = -diag_offset if j == 0 else diag_offset
        midx, midy = (x0 + x1) / 2, (y0 + y1) / 2
        ax.text(midx, midy + dy_lab, f"{W2[j, 0]:.2f}",
                color='seagreen', fontsize=9,
                ha='center', va='center',
                rotation=angle, rotation_mode='anchor')

    # ─────── 바이어스 ───────
    ax.text(nodes['H1'][0], nodes['H1'][1] + R + 0.20,
            f"b_h1={b1[0,0]:.2f}", color='purple', fontsize=9, ha='center')
    ax.text(nodes['H2'][0], nodes['H2'][1] - R - 0.24,
            f"b_h2={b1[0,1]:.2f}", color='purple', fontsize=9, ha='center')
    ax.text(nodes['O1'][0] + R + 0.30, nodes['O1'][1],
            f"b_o={b2[0,0]:.2f}", color='purple', fontsize=9, ha='left')

    # ─────── 축·제목 ───────
    xs, ys = zip(*nodes.values())
    margin = 1.0
    ax.set_xlim(min(xs) - margin, max(xs) + margin)
    ax.set_ylim(min(ys) - margin, max(ys) + margin)
    ax.set_aspect("equal", adjustable="box")

    plt.title(title, fontsize=14, pad=25)
    plt.tight_layout()
    plt.show()


In [143]:
def train_xor():
    for attempt in range(1, max_attempts + 1):
        # 가중치 초기화
        rng = np.random.default_rng()
        W1 = rng.normal(scale=0.5, size=(2, 2))
        b1 = np.zeros((1, 2))
        W2 = rng.normal(scale=0.5, size=(2, 1))
        b2 = np.zeros((1, 1))
        draw_xor_network(W1, b1, W2, b2, title="First Weights of Not Trained XOR Perceptron")
        loss_log = []

        for epoch in range(1, epochs + 1):
            # 순‧역전파
            z1 = X @ W1 + b1
            a1 = sigmoid(z1)
            z2 = a1 @ W2 + b2
            a2 = sigmoid(z2)

            loss = np.mean((a2 - y) ** 2)
            loss_log.append(loss)

            d2  = a2 - y
            dW2 = a1.T @ d2
            db2 = d2.sum(axis=0, keepdims=True)

            d1  = (d2 @ W2.T) * dsigmoid(z1)
            dW1 = X.T @ d1
            db1 = d1.sum(axis=0, keepdims=True)

            W2 -= lr * dW2
            b2 -= lr * db2
            W1 -= lr * dW1
            b1 -= lr * db1

            # 중간 모니터링
            if epoch % print_every == 0 or epoch == 1:
                preds = (a2 > 0.5).astype(int).flatten().tolist()
                truth = y.flatten().tolist()
                print(f"[Attempt {attempt}  Epoch {epoch:5}] "
                      f"loss={loss:.5f}  예측={preds}  정답={truth}")

            # 마지막 epoch(얼리 스톱 포함) 출력
            is_final = (epoch == epochs) or np.array_equal(
                (a2 > 0.5).astype(int), y.astype(int)
            )
            if is_final:
                preds = (a2 > 0.5).astype(int).flatten().tolist()
                truth = y.flatten().tolist()
                print(f"[Attempt {attempt}  Epoch {epoch:5}] "
                      f"최종 loss={loss:.5f}  예측={preds}  정답={truth}")

            # 얼리 스톱
            if np.array_equal((a2 > 0.5).astype(int), y.astype(int)):
                print(f"✓ XOR mastered!  (attempt {attempt}, epoch {epoch})")
                return W1, b1, W2, b2, loss_log

        print(f"✗ Attempt {attempt} 실패, 가중치 재초기화…\n")

    raise RuntimeError("여러 번 시도했지만 XOR 학습에 실패했습니다.")

In [None]:
W1, b1, W2, b2, loss_log = train_xor()
draw_xor_network(W1, b1, W2, b2)
plt.plot(loss_log)
plt.title("Training Loss Curve (MSE)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()

In [None]:
while True:
    x1 = input("첫 번째 비트(0/1) (q 종료): ").strip()
    if x1.lower() == "q": break
    x2 = input("두 번째 비트(0/1): ").strip()
    if x2.lower() == "q": break
    try:
        x1, x2 = int(x1), int(x2)
        prob   = predict(np.array([[x1, x2]]), W1, b1, W2, b2)
        result = int((prob > 0.5).item())
        print(f"▶ XOR({x1}, {x2}) = {result}\n")
    except ValueError:
        print("0 또는 1 (또는 q)만 입력하세요.\n")