### Задание

1. Сгенерировать последовательности, которые бы состояли из цифр (от 0 до 9) и задавались следующим образом:
x - последовательность цифр   
y1 = x1, y(i) = x(i) + x(1). Если y(i) >= 10, то y(i) = y(i) - 10   
2. научить модель предсказывать y(i) по x(i)
3. попробовать RNN, LSTM, GRU

In [0]:
import torch
from torch import nn
import numpy as np
import re
import random
import tqdm
import time

In [0]:
def generate_xy(sequence_len=100, batch_size=1, torch_type = torch.long):
    ''' 
    Создание датасета по алгоритму: 
    y1 = x1, y(i) = x(i) + x(1). Если y(i) >= 10, то y(i) = y(i) - 10   
    '''
    # torch_type = torch.long
    assert batch_size >= 1, 'Wrong batch size'
    if sequence_len is None:
        sequence_len = np.random.randint(10, 100)
        
    X = torch.zeros((batch_size, sequence_len), dtype=torch_type)
    y = torch.zeros((batch_size, sequence_len), dtype=torch_type)

    for i in range(batch_size):
        X_line = np.random.randint(0, 9, size=[sequence_len])
        y_line = X_line + X_line[0]
        y_line = np.vectorize(lambda x: x if x < 10 else x - 10)(y_line)
        y_line[0] = X_line[0]
        X[i] = torch.tensor(X_line, dtype=torch_type)
        y[i] = torch.tensor(y_line, dtype=torch_type)
    return X, y

In [0]:
X, y = generate_xy(sequence_len=10, batch_size=10, torch_type=torch.long)

In [0]:
class NeuralNetwork(nn.Module):
    def __init__(self, rnnClass, input_size, embedding_size, num_hiddens, num_classes):
        super().__init__()
        self.num_hiddens = num_hiddens
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True)
        self.output = nn.Linear(num_hiddens, num_classes)
        
    def forward(self, X):
        out = self.embedding(X)
        if type(self.hidden) == nn.LSTM:
            _, (state, _) = self.hidden(out)
        else:
            _, state = self.hidden(out)

        predictions = self.output(state[0])
        return predictions    

In [0]:
def sample(preds):
    softmaxed = torch.softmax(preds, 0)
    probas = torch.distributions.multinomial.Multinomial(1, softmaxed).sample()
    return probas.argmax()

In [0]:
model = NeuralNetwork(rnnClass=nn.LSTM, input_size=10, embedding_size=64, 
                      num_hiddens=64, num_classes=10)

model = model.cuda()

In [52]:
X, y = generate_xy(sequence_len=10, batch_size=1)
print(X.shape, X)
print(y.shape, y)
X = X.cuda()
answers = model.forward(X)
print(answers.shape)

torch.Size([1, 10]) tensor([[1, 8, 2, 0, 1, 4, 6, 1, 5, 8]])
torch.Size([1, 10]) tensor([[1, 9, 3, 1, 2, 5, 7, 2, 6, 9]])
torch.Size([1, 10])


In [0]:
def generate_sequence(sequence_len=10):
    X, y = generate_xy(sequence_len=sequence_len, batch_size=1)    

    x_pred = torch.zeros( (1, sequence_len) , dtype=int)
    preds = torch.zeros( (1, sequence_len), dtype=int)
    for i in range(sequence_len):
        x_pred[0, i] = X.flatten()[i]

        next_digit = model(x_pred.cuda())
        preds[0, i] = sample(next_digit)

    print(y, preds)

In [0]:
def train(model, criterion, optimizer, epochs=300, sequence_len=100, batch_size=100):
    for ep in range(epochs + 1):
        start = time.time()
        train_loss = 0.
        train_passed = 0

        model.train()
        X, y = generate_xy(sequence_len=sequence_len, batch_size=batch_size)
        X = X.cuda()
        y = y.cuda()
        for i in range( sequence_len ):
            optimizer.zero_grad()
            answers = model.forward(X[:, :(i+1)])
            
            loss = criterion(answers.view(-1, 10), y[:, i].flatten())
            train_loss += loss.item()

            loss.backward()
            optimizer.step()
            train_passed += 1

        if ep % 50 == 0 and ep != 0:
            print("Epoch {}. Time: {:.3f}, Train loss: {:.3f}".format(ep, time.time() - start, train_loss / train_passed))
            model.eval()
            generate_sequence()     

In [56]:
model = NeuralNetwork(rnnClass=nn.RNN, input_size=10, embedding_size=64, 
                      num_hiddens=64, num_classes=10)

model = model.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, criterion, optimizer, epochs=300, sequence_len=10, batch_size=1000)

Epoch 50. Time: 0.107, Train loss: 1.017
tensor([[8, 6, 1, 2, 5, 6, 3, 0, 4, 3]]) tensor([[3, 6, 4, 0, 4, 6, 8, 6, 9, 4]])
Epoch 100. Time: 0.100, Train loss: 0.206
tensor([[1, 5, 9, 2, 9, 9, 6, 7, 5, 5]]) tensor([[7, 9, 4, 9, 9, 3, 4, 1, 3, 2]])
Epoch 150. Time: 0.099, Train loss: 0.063
tensor([[4, 2, 6, 5, 4, 0, 5, 9, 6, 8]]) tensor([[0, 6, 9, 0, 8, 9, 4, 9, 2, 3]])
Epoch 200. Time: 0.093, Train loss: 0.032
tensor([[3, 7, 7, 5, 6, 3, 8, 8, 1, 8]]) tensor([[3, 9, 0, 0, 3, 4, 9, 5, 6, 5]])
Epoch 250. Time: 0.101, Train loss: 0.016
tensor([[7, 5, 7, 5, 9, 5, 9, 1, 3, 4]]) tensor([[1, 9, 5, 8, 6, 3, 0, 4, 9, 6]])
Epoch 300. Time: 0.092, Train loss: 0.010
tensor([[8, 3, 0, 0, 9, 3, 2, 1, 5, 0]]) tensor([[1, 2, 4, 2, 9, 7, 8, 7, 7, 6]])


In [58]:
model = NeuralNetwork(rnnClass=nn.GRU, input_size=10, embedding_size=64, 
                      num_hiddens=64, num_classes=10)

model = model.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, criterion, optimizer, epochs=300, sequence_len=10, batch_size=1000)

Epoch 50. Time: 0.095, Train loss: 0.100
tensor([[2, 7, 7, 2, 2, 8, 9, 2, 3, 0]]) tensor([[8, 3, 6, 2, 7, 0, 7, 9, 3, 7]])
Epoch 100. Time: 0.104, Train loss: 0.018
tensor([[8, 2, 4, 1, 4, 9, 9, 8, 4, 1]]) tensor([[0, 5, 7, 1, 2, 5, 8, 2, 8, 8]])
Epoch 150. Time: 0.097, Train loss: 0.008
tensor([[7, 0, 2, 3, 2, 2, 5, 4, 9, 3]]) tensor([[1, 5, 9, 3, 8, 6, 8, 5, 2, 6]])
Epoch 200. Time: 0.096, Train loss: 0.004
tensor([[2, 4, 8, 8, 4, 3, 4, 9, 4, 5]]) tensor([[6, 4, 2, 3, 0, 5, 4, 7, 2, 8]])
Epoch 250. Time: 0.096, Train loss: 0.003
tensor([[8, 3, 4, 9, 6, 0, 2, 4, 1, 2]]) tensor([[4, 2, 7, 1, 1, 3, 1, 0, 3, 5]])
Epoch 300. Time: 0.096, Train loss: 0.002
tensor([[0, 7, 2, 1, 1, 6, 2, 1, 5, 1]]) tensor([[4, 9, 3, 0, 5, 8, 2, 0, 5, 3]])


In [60]:
model = NeuralNetwork(rnnClass=nn.LSTM, input_size=10, embedding_size=64, 
                      num_hiddens=64, num_classes=10)

model = model.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, criterion, optimizer, epochs=300, sequence_len=10, batch_size=1000)

Epoch 50. Time: 0.101, Train loss: 0.064
tensor([[7, 5, 4, 9, 0, 8, 3, 7, 3, 0]]) tensor([[4, 3, 2, 6, 6, 1, 4, 5, 2, 2]])
Epoch 100. Time: 0.100, Train loss: 0.013
tensor([[2, 5, 6, 2, 5, 0, 3, 7, 9, 6]]) tensor([[9, 7, 6, 4, 3, 1, 1, 8, 1, 0]])
Epoch 150. Time: 0.105, Train loss: 0.006
tensor([[3, 3, 7, 1, 5, 6, 7, 9, 7, 3]]) tensor([[0, 3, 1, 8, 2, 7, 6, 2, 3, 2]])
Epoch 200. Time: 0.100, Train loss: 0.003
tensor([[5, 7, 8, 8, 1, 9, 7, 8, 5, 7]]) tensor([[5, 8, 4, 9, 5, 3, 4, 8, 4, 7]])
Epoch 250. Time: 0.097, Train loss: 0.002
tensor([[8, 0, 2, 4, 2, 4, 6, 1, 9, 2]]) tensor([[7, 6, 1, 0, 8, 2, 3, 4, 6, 3]])
Epoch 300. Time: 0.097, Train loss: 0.001
tensor([[6, 3, 8, 8, 7, 3, 0, 7, 7, 7]]) tensor([[3, 5, 4, 0, 0, 8, 5, 8, 4, 5]])
