In [30]:
import torch
import time
from math import floor

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [32]:
def create_y(x):
    '''
    Creates a sequence based on x with rule: y_i = (x_i + x_0) % 10
    :param x: torch.tensor
    :return: torch.tensor with the same shape as x
    '''
    y = torch.zeros(x.shape, dtype=torch.long)
    for i in range(len(x)):
        y[i, 0] = x[i, 0]
        for j in range(1, len(x[i])):
            y[i, j] = (x[i, 0] + x[i, j]) % 10
    return y

In [33]:
X = torch.randint(0, 9, (10000, 50))
X.to(torch.long).to(device)
Y = create_y(X).to(device)

In [34]:
X_train, Y_train = X[:floor(len(X) * 0.6)], Y[:floor(len(X) * 0.6)]
X_test, Y_test = X[floor(len(X) * 0.6):floor(len(X) * 0.6) + len(X) // 5], Y[floor(len(X) * 0.6):floor(len(X) * 0.6) + len(X) // 5]
X_val, Y_val = X[floor(len(X) * 0.6) + len(X) // 5:], Y[floor(len(X) * 0.6) + len(X) // 5:]

In [35]:
# define constants
BATCH_SIZE = 128

In [36]:
train_dl = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X_train.to(device), Y_train.to(device)),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)
test_dl = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X_test.to(device), Y_test.to(device)),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)
val_dl = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X_val.to(device), Y_val.to(device)),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=True
)

In [37]:
class RNNSec2Sec(torch.nn.Module):
    def __init__(self, rnn_type):
        super(RNNSec2Sec, self).__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.rnn = rnn_type(10, 100, batch_first=True)
        self.linear = torch.nn.Linear(100, 10)

    def forward(self, sentences):
        embed = self.embed(sentences)
        o, a = self.rnn(embed)
        out = self.linear(o)
        return out

In [38]:
def train_model(model, criterion, optimizer, train_dl, val_dl, epochs):
    for epoch in range(epochs):
        train_loss, train_acc, iter_num = .0, .0, .0
        start_epoch_time = time.time()
        model.train()
        for x_in, y_in in train_dl:
            x_in = x_in
            y_in = y_in.view(1, -1).squeeze()
            optimizer.zero_grad()
            out = model.forward(x_in).view(-1, 10)
            l = criterion(out, y_in)
            train_loss += l.item()
            batch_acc = (out.argmax(dim=1) == y_in)
            train_acc += batch_acc.sum().item() / batch_acc.shape[0]
            l.backward()
            optimizer.step()
            iter_num += 1
        print(
            f"Epoch: {epoch}, loss: {train_loss:.4f}, acc: "
            f"{train_acc / iter_num:.4f}",
            end=" | "
        )
        val_loss, val_acc, iter_num = .0, .0, .0
        model.eval()
        for x_in, y_in in val_dl:
            x_in = x_in
            y_in = y_in.view(1, -1).squeeze()
            out = model.forward(x_in).view(-1, 10)
            l = criterion(out, y_in)
            val_loss += l.item()
            batch_acc = (out.argmax(dim=1) == y_in)
            val_acc += batch_acc.sum().item() / batch_acc.shape[0]
            iter_num += 1
        print(
            f"val loss: {val_loss:.4f}, val acc: {val_acc / iter_num:.4f} | "
            f"{time.time() - start_epoch_time:.2f} sec."
        )

In [39]:
def test_model(model, test_dl):
    test_loss, test_acc, iter_num = .0, .0, .0
    model.eval()
    for x_in, y_in in test_dl:
        x_in = x_in
        y_in = y_in.view(1, -1).squeeze()
        out = model.forward(x_in).view(-1, 10)
        l = criterion(out, y_in)
        test_loss += l.item()
        batch_acc = (out.argmax(dim=1) == y_in)
        test_acc += batch_acc.sum().item() / batch_acc.shape[0]
        iter_num += 1
    print(
        f"test loss: {test_loss:.4f}, test acc: {test_acc / iter_num:.4f}")

In [40]:
simple_rnn = RNNSec2Sec(torch.nn.RNN)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(simple_rnn.parameters(), lr=0.005)

In [41]:
simple_rnn.to(device)
criterion.to(device)

CrossEntropyLoss()

In [42]:
train_model(simple_rnn, criterion, optimizer, train_dl, val_dl, 200)

Epoch: 0, loss: 103.4111, acc: 0.1155 | val loss: 33.4361, val acc: 0.1179 | 1.25 sec.
Epoch: 1, loss: 102.2738, acc: 0.1154 | val loss: 33.1931, val acc: 0.1125 | 0.21 sec.
Epoch: 2, loss: 101.9180, acc: 0.1140 | val loss: 33.1511, val acc: 0.1198 | 0.19 sec.
Epoch: 3, loss: 101.7872, acc: 0.1175 | val loss: 33.2348, val acc: 0.1099 | 0.19 sec.
Epoch: 4, loss: 101.5511, acc: 0.1215 | val loss: 32.8490, val acc: 0.1541 | 0.19 sec.
Epoch: 5, loss: 97.5603, acc: 0.1811 | val loss: 28.4847, val acc: 0.2272 | 0.18 sec.
Epoch: 6, loss: 78.5829, acc: 0.2333 | val loss: 23.5319, val acc: 0.2455 | 0.19 sec.
Epoch: 7, loss: 70.8876, acc: 0.2425 | val loss: 22.8219, val acc: 0.2440 | 0.18 sec.
Epoch: 8, loss: 69.6498, acc: 0.2506 | val loss: 22.5733, val acc: 0.2482 | 0.18 sec.
Epoch: 9, loss: 69.4294, acc: 0.2579 | val loss: 22.6615, val acc: 0.2585 | 0.19 sec.
Epoch: 10, loss: 69.1822, acc: 0.2684 | val loss: 24.6180, val acc: 0.2596 | 0.18 sec.
Epoch: 11, loss: 72.3506, acc: 0.2665 | val loss

In [43]:
test_model(simple_rnn, test_dl)

test loss: 0.0006, test acc: 1.0000


In [44]:
lstm_model = RNNSec2Sec(torch.nn.LSTM)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.005)

In [45]:
lstm_model.to(device)
criterion.to(device)

CrossEntropyLoss()

In [46]:
train_model(lstm_model, criterion, optimizer, train_dl, val_dl, 200)

Epoch: 0, loss: 103.7128, acc: 0.1151 | val loss: 33.2853, val acc: 0.1206 | 0.35 sec.
Epoch: 1, loss: 101.2821, acc: 0.1336 | val loss: 32.1662, val acc: 0.1915 | 0.25 sec.
Epoch: 2, loss: 82.5427, acc: 0.2308 | val loss: 23.8902, val acc: 0.2483 | 0.25 sec.
Epoch: 3, loss: 68.9288, acc: 0.2721 | val loss: 21.5153, val acc: 0.3398 | 0.25 sec.
Epoch: 4, loss: 52.7665, acc: 0.5433 | val loss: 11.4689, val acc: 0.7154 | 0.25 sec.
Epoch: 5, loss: 24.3121, acc: 0.7672 | val loss: 5.0236, val acc: 0.8148 | 0.25 sec.
Epoch: 6, loss: 7.8730, acc: 0.9479 | val loss: 0.6136, val acc: 1.0000 | 0.25 sec.
Epoch: 7, loss: 1.0581, acc: 1.0000 | val loss: 0.2081, val acc: 1.0000 | 0.24 sec.
Epoch: 8, loss: 0.5011, acc: 1.0000 | val loss: 0.1267, val acc: 1.0000 | 0.25 sec.
Epoch: 9, loss: 0.3294, acc: 1.0000 | val loss: 0.0891, val acc: 1.0000 | 0.24 sec.
Epoch: 10, loss: 0.2417, acc: 1.0000 | val loss: 0.0682, val acc: 1.0000 | 0.25 sec.
Epoch: 11, loss: 0.1885, acc: 1.0000 | val loss: 0.0544, val a

In [47]:
test_model(lstm_model, test_dl)

test loss: 0.0000, test acc: 1.0000


In [48]:
gru_model = RNNSec2Sec(torch.nn.GRU)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.005)

In [49]:
gru_model.to(device)
criterion.to(device)

CrossEntropyLoss()

In [50]:
train_model(gru_model, criterion, optimizer, train_dl, val_dl, 200)

Epoch: 0, loss: 103.8920, acc: 0.1128 | val loss: 33.4055, val acc: 0.1245 | 0.29 sec.
Epoch: 1, loss: 101.8603, acc: 0.1251 | val loss: 33.0293, val acc: 0.1343 | 0.24 sec.
Epoch: 2, loss: 93.0302, acc: 0.2165 | val loss: 21.1063, val acc: 0.5227 | 0.22 sec.
Epoch: 3, loss: 22.8142, acc: 0.8869 | val loss: 0.6613, val acc: 0.9998 | 0.21 sec.
Epoch: 4, loss: 0.9701, acc: 1.0000 | val loss: 0.1689, val acc: 1.0000 | 0.22 sec.
Epoch: 5, loss: 0.3963, acc: 1.0000 | val loss: 0.0977, val acc: 1.0000 | 0.21 sec.
Epoch: 6, loss: 0.2502, acc: 1.0000 | val loss: 0.0670, val acc: 1.0000 | 0.23 sec.
Epoch: 7, loss: 0.1778, acc: 1.0000 | val loss: 0.0495, val acc: 1.0000 | 0.21 sec.
Epoch: 8, loss: 0.1348, acc: 1.0000 | val loss: 0.0385, val acc: 1.0000 | 0.21 sec.
Epoch: 9, loss: 0.1066, acc: 1.0000 | val loss: 0.0310, val acc: 1.0000 | 0.21 sec.
Epoch: 10, loss: 0.0869, acc: 1.0000 | val loss: 0.0256, val acc: 1.0000 | 0.21 sec.
Epoch: 11, loss: 0.0723, acc: 1.0000 | val loss: 0.0215, val acc: 

In [51]:
test_model(gru_model, test_dl)

test loss: 0.0000, test acc: 1.0000
