In [1]:
import torch
from torch.utils.data import *
import time

Глобальные переменные

In [2]:
BATCH_SIZE = 256
NUM_EPOCH = 100
learning_rate = 0.001
len_sequence = 50
count_sequence = 20000

Правило генерации последовательности

In [3]:
def encode(x):
    y = torch.zeros_like(x)
    y[0] = x[0]
    for i in range(1, len(x)):
        y[i] = (x[i]+x[0]) % 10
    return y

Генерация датасета

In [4]:
count_train_sample, count_test_sample = int(count_sequence*0.7), int(count_sequence*0.3)

x_train = torch.stack([torch.randint(low=0, high=9, size=(len_sequence,)) for i in range(count_train_sample)])
y_train = torch.stack([encode(i) for i in x_train])

x_test = torch.stack([torch.randint(low=0, high=9, size=(len_sequence,)) for i in range(count_test_sample)])
y_test = torch.stack([encode(i) for i in x_test])

train_ds = DataLoader(TensorDataset(x_train, y_train),
                      batch_size=BATCH_SIZE,
                      shuffle=True)
test_ds = DataLoader(TensorDataset(x_test, y_test),
                     batch_size=BATCH_SIZE,
                     shuffle=True)

Тренировка модели

In [5]:
def train(train_ds, test_ds, model, optimizer, loss, num_epoch):
    for epoch in range(num_epoch):
        train_loss, train_acc, iter_num = .0, .0, .0
        start_epoch_time = time.time()
        model.train()
        for x, y in train_ds:
            x = x
            y = y.view(1, -1).squeeze()
            optimizer.zero_grad()
            out = model.forward(x).view(-1, 10)
            l = loss(out, y)
            train_loss += l.item()
            batch_acc = (out.argmax(dim=1) == y)
            train_acc += batch_acc.sum().item() / batch_acc.shape[0]
            l.backward()
            optimizer.step()
            iter_num += 1
            
        print(f"Epoch: {epoch+1}, loss: {train_loss:.4f}, acc: "
              f"{train_acc / iter_num:.4f}",
              end=" | ")
        
        test_loss, test_acc, iter_num = .0, .0, .0
        model.eval()
        for x, y in test_ds:
            x = x
            y = y.view(1, -1).squeeze()
            out = model.forward(x).view(-1, 10)
            l = loss(out, y)
            test_loss += l.item()
            batch_acc = (out.argmax(dim=1) == y)
            test_acc += batch_acc.sum().item() / batch_acc.shape[0]
            iter_num += 1
            
        print(f"test loss: {test_loss:.4f}, test acc: {test_acc / iter_num:.4f} | "
              f"{time.time() - start_epoch_time:.2f} sec.")

### Модель RNN

In [6]:
class RNNModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.rnn = torch.nn.RNN(10, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.rnn(embed)
        return self.linear(o)

In [7]:
model = RNNModel()
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
train(train_ds, test_ds, model, optimizer, loss, NUM_EPOCH)

Epoch: 1, loss: 124.7274, acc: 0.1101 | test loss: 53.8284, test acc: 0.1142 | 4.19 sec.
Epoch: 2, loss: 122.8049, acc: 0.1158 | test loss: 53.4059, test acc: 0.1185 | 4.45 sec.
Epoch: 3, loss: 122.2032, acc: 0.1148 | test loss: 53.2192, test acc: 0.1182 | 4.11 sec.
Epoch: 4, loss: 121.7453, acc: 0.1167 | test loss: 53.0504, test acc: 0.1127 | 4.00 sec.
Epoch: 5, loss: 121.3773, acc: 0.1184 | test loss: 52.9199, test acc: 0.1209 | 4.34 sec.
Epoch: 6, loss: 121.1482, acc: 0.1255 | test loss: 52.9739, test acc: 0.1238 | 3.89 sec.
Epoch: 7, loss: 120.1573, acc: 0.1576 | test loss: 51.9929, test acc: 0.1646 | 4.47 sec.
Epoch: 8, loss: 113.9118, acc: 0.2078 | test loss: 46.6758, test acc: 0.2349 | 4.14 sec.
Epoch: 9, loss: 101.5192, acc: 0.2346 | test loss: 42.5083, test acc: 0.2344 | 4.23 sec.
Epoch: 10, loss: 95.4991, acc: 0.2411 | test loss: 41.5510, test acc: 0.2478 | 4.23 sec.
Epoch: 11, loss: 94.2312, acc: 0.2511 | test loss: 41.7638, test acc: 0.2435 | 4.32 sec.
Epoch: 12, loss: 90.7

Epoch: 93, loss: 9.0123, acc: 0.8990 | test loss: 3.8583, test acc: 0.9015 | 4.17 sec.
Epoch: 94, loss: 8.8341, acc: 0.9005 | test loss: 3.8142, test acc: 0.9017 | 4.36 sec.
Epoch: 95, loss: 8.7159, acc: 0.8999 | test loss: 3.7168, test acc: 0.9023 | 4.39 sec.
Epoch: 96, loss: 8.5826, acc: 0.9014 | test loss: 3.6980, test acc: 0.9029 | 4.17 sec.
Epoch: 97, loss: 8.4806, acc: 0.9038 | test loss: 3.6762, test acc: 0.9028 | 4.08 sec.
Epoch: 98, loss: 8.4280, acc: 0.9033 | test loss: 3.6408, test acc: 0.9044 | 4.31 sec.
Epoch: 99, loss: 8.3386, acc: 0.9046 | test loss: 3.6302, test acc: 0.9024 | 4.23 sec.
Epoch: 100, loss: 8.3189, acc: 0.9051 | test loss: 3.5775, test acc: 0.9074 | 4.10 sec.


In [10]:
from sklearn.metrics import accuracy_score

x = torch.randint(low=0, high=9, size=(len_sequence,))
y = encode(x).view(-1)
pred = model.forward(x).argmax(dim=1).view(-1)

print('Generate sequence: \n', x)
print('Encode sequence: \n', y)
print('Predicted sequence: \n', pred)
print('Accuracy: ', accuracy_score(y, pred))

Generate sequence: 
 tensor([2, 1, 7, 1, 0, 4, 3, 3, 7, 7, 6, 0, 6, 1, 7, 5, 3, 8, 2, 6, 2, 1, 4, 8,
        2, 5, 2, 8, 8, 7, 4, 2, 4, 7, 0, 3, 8, 5, 7, 3, 7, 1, 0, 4, 1, 0, 8, 3,
        4, 7])
Encode sequence: 
 tensor([2, 3, 9, 3, 2, 6, 5, 5, 9, 9, 8, 2, 8, 3, 9, 7, 5, 0, 4, 8, 4, 3, 6, 0,
        4, 7, 4, 0, 0, 9, 6, 4, 6, 9, 2, 5, 0, 7, 9, 5, 9, 3, 2, 6, 3, 2, 0, 5,
        6, 9])
Predicted sequence: 
 tensor([2, 3, 9, 3, 2, 6, 5, 5, 9, 9, 8, 2, 8, 3, 9, 7, 5, 0, 4, 8, 4, 3, 6, 0,
        4, 7, 4, 0, 0, 9, 6, 4, 6, 9, 2, 5, 0, 7, 9, 5, 9, 3, 2, 6, 3, 2, 0, 5,
        6, 9])
Accuracy:  1.0


### Модель LSTM

In [11]:
class LSTMModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.lstm = torch.nn.LSTM(10, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.lstm(embed)
        return self.linear(o)

In [12]:
model = LSTMModel()
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
train(train_ds, test_ds, model, optimizer, loss, NUM_EPOCH)

Epoch: 1, loss: 125.4248, acc: 0.1133 | test loss: 54.2078, test acc: 0.1133 | 18.67 sec.
Epoch: 2, loss: 123.4114, acc: 0.1145 | test loss: 53.5696, test acc: 0.1157 | 17.39 sec.
Epoch: 3, loss: 122.1155, acc: 0.1239 | test loss: 53.0671, test acc: 0.1287 | 17.08 sec.
Epoch: 4, loss: 121.2212, acc: 0.1306 | test loss: 52.7499, test acc: 0.1315 | 17.58 sec.
Epoch: 5, loss: 119.4699, acc: 0.1603 | test loss: 50.3672, test acc: 0.2082 | 18.30 sec.
Epoch: 6, loss: 108.2123, acc: 0.2280 | test loss: 44.2783, test acc: 0.2372 | 17.82 sec.
Epoch: 7, loss: 98.2187, acc: 0.2420 | test loss: 41.4794, test acc: 0.2504 | 18.36 sec.
Epoch: 8, loss: 86.7658, acc: 0.3274 | test loss: 30.9737, test acc: 0.4396 | 17.21 sec.
Epoch: 9, loss: 57.8396, acc: 0.5387 | test loss: 19.5730, test acc: 0.6076 | 17.69 sec.
Epoch: 10, loss: 38.9290, acc: 0.6808 | test loss: 14.5707, test acc: 0.7591 | 17.50 sec.
Epoch: 11, loss: 25.9469, acc: 0.8530 | test loss: 8.3346, test acc: 0.9241 | 17.58 sec.
Epoch: 12, los

Epoch: 94, loss: 0.0095, acc: 1.0000 | test loss: 0.0041, test acc: 1.0000 | 18.85 sec.
Epoch: 95, loss: 0.0092, acc: 1.0000 | test loss: 0.0040, test acc: 1.0000 | 17.85 sec.
Epoch: 96, loss: 0.0088, acc: 1.0000 | test loss: 0.0038, test acc: 1.0000 | 18.12 sec.
Epoch: 97, loss: 0.0086, acc: 1.0000 | test loss: 0.0037, test acc: 1.0000 | 18.22 sec.
Epoch: 98, loss: 0.0083, acc: 1.0000 | test loss: 0.0036, test acc: 1.0000 | 18.13 sec.
Epoch: 99, loss: 0.0080, acc: 1.0000 | test loss: 0.0034, test acc: 1.0000 | 18.29 sec.
Epoch: 100, loss: 0.0077, acc: 1.0000 | test loss: 0.0033, test acc: 1.0000 | 17.19 sec.


In [14]:
x = torch.randint(low=0, high=9, size=(len_sequence,))
y = encode(x).view(-1)
pred = model.forward(x).argmax(dim=1).view(-1)

print('Generate sequence: \n', x)
print('Encode sequence: \n', y)
print('Predicted sequence: \n', pred)
print('Accuracy: ', accuracy_score(y, pred))

Generate sequence: 
 tensor([7, 3, 8, 5, 8, 2, 7, 8, 7, 0, 8, 0, 1, 0, 1, 6, 0, 5, 2, 7, 6, 3, 3, 6,
        2, 1, 3, 6, 5, 3, 5, 0, 3, 5, 7, 6, 8, 5, 2, 2, 1, 0, 4, 7, 8, 2, 1, 2,
        1, 2])
Encode sequence: 
 tensor([7, 0, 5, 2, 5, 9, 4, 5, 4, 7, 5, 7, 8, 7, 8, 3, 7, 2, 9, 4, 3, 0, 0, 3,
        9, 8, 0, 3, 2, 0, 2, 7, 0, 2, 4, 3, 5, 2, 9, 9, 8, 7, 1, 4, 5, 9, 8, 9,
        8, 9])
Predicted sequence: 
 tensor([7, 0, 5, 2, 5, 9, 4, 5, 4, 7, 5, 7, 8, 7, 8, 3, 7, 2, 9, 4, 3, 0, 0, 3,
        9, 8, 0, 3, 2, 0, 2, 7, 0, 2, 4, 3, 5, 2, 9, 9, 8, 7, 1, 4, 5, 9, 8, 9,
        8, 9])
Accuracy:  1.0


### Модель GRU

In [15]:
class GRUModel(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.embed = torch.nn.Embedding(10, 10)
        self.gru = torch.nn.GRU(10, 128, batch_first=True)
        self.linear = torch.nn.Linear(128, 10)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, h = self.gru(embed)
        return self.linear(o)

In [16]:
model = GRUModel()
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
train(train_ds, test_ds, model, optimizer, loss, NUM_EPOCH)

Epoch: 1, loss: 125.2010, acc: 0.1110 | test loss: 54.0873, test acc: 0.1091 | 13.91 sec.
Epoch: 2, loss: 123.2391, acc: 0.1148 | test loss: 53.5646, test acc: 0.1190 | 13.90 sec.
Epoch: 3, loss: 122.1482, acc: 0.1223 | test loss: 53.0622, test acc: 0.1240 | 14.51 sec.
Epoch: 4, loss: 121.2176, acc: 0.1276 | test loss: 52.7566, test acc: 0.1292 | 14.19 sec.
Epoch: 5, loss: 120.4544, acc: 0.1362 | test loss: 52.1835, test acc: 0.1514 | 14.15 sec.
Epoch: 6, loss: 113.4618, acc: 0.2165 | test loss: 47.0877, test acc: 0.2381 | 13.85 sec.
Epoch: 7, loss: 103.1026, acc: 0.2650 | test loss: 42.7004, test acc: 0.3276 | 13.77 sec.
Epoch: 8, loss: 79.9671, acc: 0.5238 | test loss: 23.8423, test acc: 0.7661 | 13.79 sec.
Epoch: 9, loss: 30.1269, acc: 0.9415 | test loss: 5.2107, test acc: 0.9979 | 14.34 sec.
Epoch: 10, loss: 6.7354, acc: 0.9996 | test loss: 1.6443, test acc: 1.0000 | 13.50 sec.
Epoch: 11, loss: 2.7261, acc: 1.0000 | test loss: 0.8655, test acc: 1.0000 | 14.08 sec.
Epoch: 12, loss: 

Epoch: 94, loss: 0.0051, acc: 1.0000 | test loss: 0.0022, test acc: 1.0000 | 14.57 sec.
Epoch: 95, loss: 0.0049, acc: 1.0000 | test loss: 0.0021, test acc: 1.0000 | 14.12 sec.
Epoch: 96, loss: 0.0048, acc: 1.0000 | test loss: 0.0020, test acc: 1.0000 | 14.30 sec.
Epoch: 97, loss: 0.0046, acc: 1.0000 | test loss: 0.0020, test acc: 1.0000 | 14.92 sec.
Epoch: 98, loss: 0.0044, acc: 1.0000 | test loss: 0.0019, test acc: 1.0000 | 13.74 sec.
Epoch: 99, loss: 0.0043, acc: 1.0000 | test loss: 0.0019, test acc: 1.0000 | 14.15 sec.
Epoch: 100, loss: 0.0042, acc: 1.0000 | test loss: 0.0018, test acc: 1.0000 | 13.72 sec.


In [18]:
x = torch.randint(low=0, high=9, size=(len_sequence,))
y = encode(x).view(-1)
pred = model.forward(x).argmax(dim=1).view(-1)

print('Generate sequence: \n', x)
print('Encode sequence: \n', y)
print('Predicted sequence: \n', pred)
print('Accuracy: ', accuracy_score(y, pred))

Generate sequence: 
 tensor([2, 0, 6, 5, 2, 5, 8, 2, 4, 1, 6, 1, 8, 2, 1, 4, 2, 1, 5, 3, 8, 4, 7, 1,
        7, 0, 4, 3, 5, 4, 6, 6, 6, 7, 5, 8, 0, 4, 8, 4, 3, 6, 0, 6, 3, 4, 1, 5,
        5, 5])
Encode sequence: 
 tensor([2, 2, 8, 7, 4, 7, 0, 4, 6, 3, 8, 3, 0, 4, 3, 6, 4, 3, 7, 5, 0, 6, 9, 3,
        9, 2, 6, 5, 7, 6, 8, 8, 8, 9, 7, 0, 2, 6, 0, 6, 5, 8, 2, 8, 5, 6, 3, 7,
        7, 7])
Predicted sequence: 
 tensor([2, 2, 8, 7, 4, 7, 0, 4, 6, 3, 8, 3, 0, 4, 3, 6, 4, 3, 7, 5, 0, 6, 9, 3,
        9, 2, 6, 5, 7, 6, 8, 8, 8, 9, 7, 0, 2, 6, 0, 6, 5, 8, 2, 8, 5, 6, 3, 7,
        7, 7])
Accuracy:  1.0
