In [9]:
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import torch

In [4]:
# 1. 데이터셋 (간단 예시)
texts = [
    "Box box box",
    "My tyres are gone",
    "Tell him to get out",
    "We need to push",
    "Engine is overheating",
    "Good job keep pushing",
    "I'm losing power",
    "Switching to plan B",
    "Push now push now",
    "Let me race please"
]
labels = [
    "진입 명령", "차량 상태", "불만/요청", "전략/전술", "차량 상태",
    "격려", "차량 상태", "전략/전술", "전략/전술", "불만/요청"
]

In [5]:
tokenized = [text.lower().split() for text in texts]
word2idx = {}
idx2word = {}
idx = 0
for sentence in tokenized:
    for word in sentence:
        if word not in word2idx:
            word2idx[word] = idx
            idx2word[idx] = word
            idx += 1
vocab_size = len(word2idx)

In [6]:
label2idx = {
    "진입 명령": 0,
    "차량 상태": 1,
    "불만/요청": 2,
    "전략/전술": 3,
    "격려": 4
}
idx2label = {v: k for k, v in label2idx.items()}

In [7]:
X_data = []
y_data = []

for sentence, label in zip(tokenized, labels):
    context_vec = np.zeros(vocab_size)
    for word in sentence:
        context_vec[word2idx[word]] += 1
    X_data.append(context_vec / len(sentence))
    y_data.append(label)

X = np.array(X_data)
y = [label2idx[label] for label in labels]
num_classes = len(set(y))

In [8]:
embedding_dim = 16
W_embed = np.random.randn(vocab_size, embedding_dim) * 0.01  # 단어 임베딩
W_out = np.random.randn(embedding_dim, num_classes) * 0.01   # 출력 가중치
b_out = np.zeros((1, num_classes))
learning_rate = 0.1

In [10]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy(probs, y_true):
    batch_size = probs.shape[0]
    log_probs = torch.log(probs + 1e-9)  # 防止 log(0)
    loss = -log_probs[range(batch_size), y_true].mean()
    return loss