In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.optim import Adam
from data_rnn import load_ndfa, load_brackets
from utils import device, ARDataset, AutoRegressiveNetwork
from random import choices

In [2]:
x_train, (i2w, w2i) = load_ndfa(n=150_000)

In [3]:
model = AutoRegressiveNetwork(w2i).to(device)
optimizer = Adam([p for p in model.parameters() if p.requires_grad], lr=3e-4, weight_decay=1e-4)
dl = ARDataset(x_train, w2i, bs=8, maxsize=200)
criterion = nn.CrossEntropyLoss()

In [4]:
for epoch in range(10):
    model.train()
    dl.shuffle()
    total_loss = 0
    c = 0
    for x, y in dl.dataloader():
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        c += 1
    print(f'Epoch {epoch}, Train Loss: {total_loss / c:.2f}')

Epoch 0, Train Loss: 0.91
Epoch 1, Train Loss: 0.24
Epoch 2, Train Loss: 0.20
Epoch 3, Train Loss: 0.19
Epoch 4, Train Loss: 0.19
Epoch 5, Train Loss: 0.19
Epoch 6, Train Loss: 0.19
Epoch 7, Train Loss: 0.19
Epoch 8, Train Loss: 0.19
Epoch 9, Train Loss: 0.19


In [5]:
sm = nn.Softmax(dim=1)
for _ in range(10):
    seq = [w2i['.start'], w2i['s']]
    while w2i['.end'] not in seq:
        seq.append(choices(range(len(w2i)), weights=sm(model(torch.tensor([seq], dtype=torch.long, device=device)))[-1, :].detach(), k=1)[0])
    print(''.join([i2w[i] for i in seq]))

.startsuvw!uvw!s.end
.startsklm!klm!klm!s.end
.startsuvw!uvww!uvw!uvw!uvw!s.end
.startsklm!klm!klm!s.end
.startsabc!abc!abc!abc!abc!abc!s.end
.startss.end
.startss.end
.startsabc!abc!s.end
.startsabc!s.end
.startsabc!abc!abc!abc!abc!abc!abc!s.end


In [6]:
x_train, (i2w, w2i) = load_brackets(n=150_000)

In [7]:
model = AutoRegressiveNetwork(w2i).to(device)
optimizer = Adam([p for p in model.parameters() if p.requires_grad], lr=3e-4, weight_decay=1e-4)
dl = ARDataset(x_train, w2i, bs=8, maxsize=200)
criterion = nn.CrossEntropyLoss()

In [8]:
for epoch in range(10):
    model.train()
    dl.shuffle()
    total_loss = 0
    c = 0
    for x, y in dl.dataloader():
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        c += 1
    print(f'Epoch {epoch}, Train Loss: {total_loss / c:.2f}')

Epoch 0, Train Loss: 1.50
Epoch 1, Train Loss: 0.99
Epoch 2, Train Loss: 0.77
Epoch 3, Train Loss: 0.64
Epoch 4, Train Loss: 0.58
Epoch 5, Train Loss: 0.54
Epoch 6, Train Loss: 0.52
Epoch 7, Train Loss: 0.50
Epoch 8, Train Loss: 0.49
Epoch 9, Train Loss: 0.48


In [9]:
sm = nn.Softmax(dim=1)
for _ in range(10):
    seq = [w2i['.start'], w2i['('], w2i['('], w2i[')']]
    while w2i['.end'] not in seq:
        seq.append(choices(range(len(w2i)), weights=sm(model(torch.tensor([seq], dtype=torch.long, device=device)))[-1, :].detach(), k=1)[0])
    print(sum([1 if w2i['('] == i else -1 for i in seq[1:-1]]) == 0, ':', ''.join([i2w[i] for i in seq]))

True : .start(()()(()(()()(()()((()))((((())(()())())))(()))()(())))()).end
False : .start(()(()((((((((())))((()((())()))()))())()((()())(((((()((()()))))()))(())(()()(()))(())((()(()))))))))()((())))()()((()()))(()))).end
True : .start(()).end
False : .start(())).end
True : .start(()()).end
True : .start(()).end
False : .start(()(()((()))))(.end
True : .start(()()).end
True : .start(()).end
False : .start(()))).end
