<a href="https://colab.research.google.com/github/Daniilsol200/KURSOVAI_kovalevski_Kurs3/blob/main/%D0%9A%D1%83%D1%80%D1%81%D0%BE%D0%B2%D0%B0%D1%8F%D0%9D%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D1%8B%D0%B5%D0%A1%D0%B5%D1%82%D0%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np, random
np.random.seed(42); random.seed(42)

# =========================
# 1) Подготовка данных
# =========================
text = """
Нейронные сети — это мощный инструмент, который способен находить закономерности в данных.
Они применяются в обработке языка, изображений, в прогнозировании и многом другом.
В этой небольшой демонстрации мы обучаем простые рекуррентные сети для предсказания следующего символа.
Искусственный интеллект, нейронные сети, машинное обучение — что на самом деле означают все эти нынче популярные понятия?
Для большинства непосвященных людей, коим являюсь и я сам, они всегда казались чем-то фантастическим.
Впервые понятие искусственных нейронных сетей (ИНС) возникло при попытке смоделировать процессы головного мозга.
""".strip() * 20

text = text[:4000]
chars = sorted(list(set(text)))
vocab_size = len(chars)
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for ch, i in char2idx.items()}

data = np.array([char2idx[ch] for ch in text], dtype=np.int32)
X_all, Y_all = data[:-1], data[1:]
split = int(0.9 * len(X_all))
X_train, Y_train = X_all[:split], Y_all[:split]
X_test, Y_test = X_all[split:], Y_all[split:]
print(f"len text={len(text)}, vocab={vocab_size}, train={len(X_train)}, test={len(X_test)}")

len text=4000, vocab=46, train=3599, test=400


In [3]:
# =========================
# 2) Вспомогательные функции
# =========================
def onehot(index, vocab_size):
    v = np.zeros(vocab_size, dtype=np.float32)
    v[index] = 1.0
    return v

def to_onehot_seq(indices, vocab_size):
    return [onehot(int(i), vocab_size) for i in indices]

def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / np.sum(e)

def cross_entropy_from_probs(probs, target_idx):
    return -np.log(probs[target_idx] + 1e-12)

def sample_with_temperature(p, T=1.0):
    p = np.asarray(p, dtype=np.float64)
    p = np.log(p + 1e-12) / T
    p = np.exp(p - np.max(p))
    p = p / np.sum(p)
    return np.random.choice(len(p), p=p)

In [4]:
# =========================
# 3) Elman RNN
# =========================
class ElmanRNN:
    def __init__(self, vocab_size, hidden_size=128):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        limit = np.sqrt(6.0 / (vocab_size + hidden_size))
        self.Wxh = np.random.uniform(-limit, limit, (hidden_size, vocab_size)).astype(np.float32)
        self.Whh = np.random.uniform(-limit, limit, (hidden_size, hidden_size)).astype(np.float32)
        self.Why = np.random.uniform(-limit, limit, (vocab_size, hidden_size)).astype(np.float32)
        self.bh = np.zeros(hidden_size, dtype=np.float32)
        self.by = np.zeros(vocab_size, dtype=np.float32)

    def forward(self, inputs, h0=None):
        T = len(inputs)
        if h0 is None:
            h_prev = np.zeros(self.hidden_size, dtype=np.float32)
        else:
            h_prev = h0.copy()
        hs = np.zeros((T + 1, self.hidden_size), dtype=np.float32)
        hs[0] = h_prev
        ps = np.zeros((T, self.vocab_size), dtype=np.float32)
        for t in range(T):
            x = inputs[t]
            hs[t + 1] = np.tanh(self.Wxh @ x + self.Whh @ hs[t] + self.bh)
            o = self.Why @ hs[t + 1] + self.by
            ps[t] = softmax(o)
        return hs, ps

    def bptt(self, inputs, targets, lr=0.005, clip=5.0, h0=None):
        T = len(inputs)
        hs, ps = self.forward(inputs, h0)
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        loss = 0.0
        dh_next = np.zeros(self.hidden_size, dtype=np.float32)

        for t in range(T):
            loss += cross_entropy_from_probs(ps[t], int(targets[t]))

        for t in reversed(range(T)):
            dy = ps[t].copy()
            dy[int(targets[t])] -= 1.0
            dWhy += np.outer(dy, hs[t + 1])
            dby += dy
            dh = self.Why.T @ dy + dh_next
            dh_raw = (1.0 - hs[t + 1] ** 2) * dh
            dbh += dh_raw
            dWxh += np.outer(dh_raw, inputs[t])
            dWhh += np.outer(dh_raw, hs[t])
            dh_next = self.Whh.T @ dh_raw

        for g in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(g, -clip, clip, out=g)

        self.Wxh -= lr * dWxh
        self.Whh -= lr * dWhh
        self.Why -= lr * dWhy
        self.bh -= lr * dbh
        self.by -= lr * dby

        return loss / T, hs[-1].copy()

    def predict_next(self, x_onehot, h_prev):
        h = np.tanh(self.Wxh @ x_onehot + self.Whh @ h_prev + self.bh)
        o = self.Why @ h + self.by
        p = softmax(o)
        return p, h

In [5]:
# =========================
# 4) Jordan RNN
# =========================
class JordanRNN:
    def __init__(self, vocab_size, hidden_size=128):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        limit = np.sqrt(6.0 / (vocab_size + hidden_size))
        self.Wxh = np.random.uniform(-limit, limit, (hidden_size, vocab_size)).astype(np.float32)
        self.Wch = np.random.uniform(-limit, limit, (hidden_size, vocab_size)).astype(np.float32)
        self.Why = np.random.uniform(-limit, limit, (vocab_size, hidden_size)).astype(np.float32)
        self.bh = np.zeros(hidden_size, dtype=np.float32)
        self.by = np.zeros(vocab_size, dtype=np.float32)

    def forward(self, inputs, ctx0=None):
        T = len(inputs)
        if ctx0 is None:
            ctx_prev = np.zeros(self.vocab_size, dtype=np.float32)
        else:
            ctx_prev = ctx0.copy()
        hs = np.zeros((T + 1, self.hidden_size), dtype=np.float32)
        ps = np.zeros((T, self.vocab_size), dtype=np.float32)
        for t in range(T):
            x = inputs[t]
            hs[t + 1] = np.tanh(self.Wxh @ x + self.Wch @ ctx_prev + self.bh)
            o = self.Why @ hs[t + 1] + self.by
            p = softmax(o)
            ps[t] = p
            ctx_prev = p
        return hs, ps

    def bptt(self, inputs, targets, lr=0.01, clip=5.0, ctx0=None):
        T = len(inputs)
        hs, ps = self.forward(inputs, ctx0)
        dWxh = np.zeros_like(self.Wxh)
        dWch = np.zeros_like(self.Wch)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        loss = 0.0

        for t in range(T):
            loss += cross_entropy_from_probs(ps[t], int(targets[t]))

        for t in reversed(range(T)):
            dy = ps[t].copy()
            dy[int(targets[t])] -= 1.0
            dWhy += np.outer(dy, hs[t + 1])
            dby += dy
            dh = self.Why.T @ dy
            dh_raw = (1.0 - hs[t + 1] ** 2) * dh
            dbh += dh_raw
            dWxh += np.outer(dh_raw, inputs[t])
            if t > 0:
                dWch += np.outer(dh_raw, ps[t - 1])
        for g in [dWxh, dWch, dWhy, dbh, dby]:
            np.clip(g, -clip, clip, out=g)

        self.Wxh -= lr * dWxh
        self.Wch -= lr * dWch
        self.Why -= lr * dWhy
        self.bh -= lr * dbh
        self.by -= lr * dby

        return loss / T, ps[-1].copy()

    def predict_next(self, x_onehot, ctx_prev):
        h = np.tanh(self.Wxh @ x_onehot + self.Wch @ ctx_prev + self.bh)
        o = self.Why @ h + self.by
        p = softmax(o)
        return p, h, p

In [6]:
# =========================
# 5) Обучение
# =========================
def train_model(model, X, Y, seq_len=50, epochs=80, lr=0.002, name="Model"):
    n = len(X)
    base_lr = lr

    # начальные состояния
    if isinstance(model, ElmanRNN):
        state = np.zeros(model.hidden_size, dtype=np.float32)
    else:
        state = np.zeros(model.vocab_size, dtype=np.float32)

    for epoch in range(1, epochs + 1):
        lr = base_lr * (0.97 ** (epoch // 5))
        total_loss = 0.0
        count = 0

        for i in range(0, n - seq_len, seq_len):
            X_seq = to_onehot_seq(X[i:i + seq_len], model.vocab_size)
            Y_seq = Y[i:i + seq_len]

            if isinstance(model, ElmanRNN):
                loss, state = model.bptt(X_seq, Y_seq, lr=lr, clip=1.0, h0=state)
                state = np.clip(state, -5, 5)
            else:
                loss, state = model.bptt(X_seq, Y_seq, lr=lr, clip=1.0, ctx0=state)
                state = np.clip(state, 0, 1)  # контекст — вероятности, ограничим диапазон

            total_loss += loss * len(X_seq)
            count += len(X_seq)

        avg_loss = total_loss / count
        if epoch % 5 == 0 or epoch == 1 or epoch == epochs:
            print(f"[{name}] epoch {epoch}/{epochs}, avg loss={avg_loss:.4f}, lr={lr:.5f}")

In [7]:
# =========================
# 6) Accuracy и генерация
# =========================
def compute_accuracy(model, X, Y):
    correct, total = 0, 0
    if isinstance(model, ElmanRNN):
        h = np.zeros(model.hidden_size, dtype=np.float32)
        for i in range(len(X)):
            x = onehot(int(X[i]), model.vocab_size)
            p, h = model.predict_next(x, h)
            if np.argmax(p) == int(Y[i]):
                correct += 1
            total += 1
    else:
        ctx = np.zeros(model.vocab_size, dtype=np.float32)
        for i in range(len(X)):
            x = onehot(int(X[i]), model.vocab_size)
            p, h, ctx = model.predict_next(x, ctx)
            if np.argmax(p) == int(Y[i]):
                correct += 1
            total += 1
    return correct / total

def generate(model, seed, length=200, T=0.8):
    out = seed
    if isinstance(model, ElmanRNN):
        h = np.zeros(model.hidden_size, dtype=np.float32)
        for ch in seed[:-1]:
            x = onehot(char2idx.get(ch, 0), model.vocab_size)
            _, h = model.predict_next(x, h)
        last_idx = char2idx.get(seed[-1], 0)
        for _ in range(length):
            x = onehot(last_idx, model.vocab_size)
            p, h = model.predict_next(x, h)
            next_idx = sample_with_temperature(p, T)
            out += idx2char[next_idx]
            last_idx = next_idx
    else:
        ctx = np.zeros(model.vocab_size, dtype=np.float32)
        for ch in seed[:-1]:
            x = onehot(char2idx.get(ch, 0), model.vocab_size)
            p, h, ctx = model.predict_next(x, ctx)
        last_idx = char2idx.get(seed[-1], 0)
        for _ in range(length):
            x = onehot(last_idx, model.vocab_size)
            p, h, ctx = model.predict_next(x, ctx)
            next_idx = sample_with_temperature(p, T)
            out += idx2char[next_idx]
            last_idx = next_idx
    return out

In [8]:
# =========================
# 7) Запуск обучения и генерации
# =========================
elman = ElmanRNN(vocab_size, hidden_size=128)
jordan = JordanRNN(vocab_size, hidden_size=128)

print("\nTraining ElmanRNN...")
train_model(elman, X_train, Y_train, seq_len=80, epochs=50, lr=0.01, name="Elman")

print("\nTraining JordanRNN...")
train_model(jordan, X_train, Y_train, seq_len=80, epochs=50, lr=0.01, name="Jordan")

acc_e = compute_accuracy(elman, X_test, Y_test)
acc_j = compute_accuracy(jordan, X_test, Y_test)
print(f"\nAccuracy — Elman: {acc_e*100:.2f}%, Jordan: {acc_j*100:.2f}%")

seed = "Нейронные се"
print("\nGenerated (Elman):\n", generate(elman, seed, 300, 0.8))
print("\nGenerated (Jordan):\n", generate(jordan, seed, 300, 0.5))


Training ElmanRNN...
[Elman] epoch 1/50, avg loss=3.7708, lr=0.01000
[Elman] epoch 5/50, avg loss=3.2148, lr=0.00970
[Elman] epoch 10/50, avg loss=2.4135, lr=0.00941
[Elman] epoch 15/50, avg loss=0.8549, lr=0.00913
[Elman] epoch 20/50, avg loss=0.1144, lr=0.00885
[Elman] epoch 25/50, avg loss=0.0111, lr=0.00859
[Elman] epoch 30/50, avg loss=0.0070, lr=0.00833
[Elman] epoch 35/50, avg loss=0.0052, lr=0.00808
[Elman] epoch 40/50, avg loss=0.0041, lr=0.00784
[Elman] epoch 45/50, avg loss=0.0035, lr=0.00760
[Elman] epoch 50/50, avg loss=0.0030, lr=0.00737

Training JordanRNN...
[Jordan] epoch 1/50, avg loss=3.2293, lr=0.01000
[Jordan] epoch 5/50, avg loss=2.3081, lr=0.00970
[Jordan] epoch 10/50, avg loss=2.0450, lr=0.00941
[Jordan] epoch 15/50, avg loss=1.9136, lr=0.00913
[Jordan] epoch 20/50, avg loss=1.8252, lr=0.00885
[Jordan] epoch 25/50, avg loss=1.7589, lr=0.00859
[Jordan] epoch 30/50, avg loss=1.6991, lr=0.00833
[Jordan] epoch 35/50, avg loss=1.6456, lr=0.00808
[Jordan] epoch 40/50

In [9]:
seed = "Нейронные се"
print("\nGenerated (Elman):\n", generate(elman, seed, 100, 0.7))
print("\nGenerated (Jordan):\n", generate(jordan, seed, 100, 0.7))


Generated (Elman):
 Нейронные сети — это мощный инструмент, который способен находить закономерности в данных.
Они применяются в обр

Generated (Jordan):
 Нейронные сено нов данных м.
В этонткены и и монта ные и годей, этинных м.
Впрольшинатклоем нный сенных ле нантн
