In [4]:
import re
import torch
from sklearn.model_selection import train_test_split         # разбить данные на тестовые и тренеровочные
from random import randint
import warnings
warnings.filterwarnings("ignore")

def generate_sequence(len_sequence):
    X = torch.randint(low=0, high=9, size=(len_sequence, 1), dtype=int)
    Y = torch.zeros((len_sequence), dtype=int)
    #print('X = ',X)
    Y[0] = X[0]
    for i in range(1, len(X)):
        val = X[i].item() + X[0].item() 
        if val>=10:
            val -= 10    
        Y[i] = val
    return X, Y

def sample(preds):
    softmaxed = torch.softmax(preds, 1) # распределяем значения от 0 до 1
    #print('softmaxed  ',softmaxed)
    probas = torch.distributions.multinomial.Multinomial(1, softmaxed).sample()  # нормализация
    #print('probas   ',probas)
    #print(probas.max(dim=1)[1])
    return probas.max(dim=1)[1]#.argmax()

# Строим класс RNN который будет принимать различную указанную вариацию рекуррентной ячейки - GRU / LSTM/ SimpleRNN
class RnnFlex(torch.nn.Module):
                        # тип     размер словаря  размер эмб       скрытые слои   классы
    def __init__(self, rnnClass, dictionary_size, embedding_size, num_hiddens, num_classes, name):
        super().__init__()
        self.name = name
        self.num_hiddens = num_hiddens
        self.embedding = torch.nn.Embedding(dictionary_size, embedding_size) # учится представлять наши выходные параметры в виде векторов
        self.hidden = rnnClass(embedding_size, num_hiddens, batch_first=True) # batch_first=True когды мы начинаем с нулевого хидден стейта
        self.output = torch.nn.Linear(num_hiddens, num_classes)

    def forward(self, X):
        out = self.embedding(X)  # прошкалировали закодированный Х и добавили размерность 1,40, 64
        #print(out.shape)
        _, state = self.hidden(out)  # приходят все выходы и последний выход (между LSTM  и GRU выход немного разный)
        #print(state[0])
        predictions = self.output(state[0])
        return predictions


X,Y = generate_sequence(200)
#print(X.shape, Y.shape)
#Y_uniq = len(set(Y.numpy()))
#print(Y_uniq)

#разбиваем матрицу на тестовую, тренировачную и валидационную 
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    Y, 
    test_size=0.30, 
    random_state=42)

X_valid, X_test, y_valid, y_test = train_test_split(
    X_test, 
    y_test, 
    test_size=0.5, 
    random_state=42)

#print(X_train.shape, y_train.shape)
#print(X_test.shape, y_test.shape)

BATCH_SIZE = 16  #32 наблюдений (строки)

dataset = torch.utils.data.TensorDataset(X_train, y_train)
data_train = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)

dataset = torch.utils.data.TensorDataset(X_test, y_test)
data_test = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)

dataset = torch.utils.data.TensorDataset(X_valid, y_valid)
data_valid = torch.utils.data.DataLoader(dataset, BATCH_SIZE, shuffle=True)


model_RNN = RnnFlex(torch.nn.RNN, 10, 64, 128, 10, 'RNN')
model_LSTM = RnnFlex(torch.nn.LSTM, 10, 64, 128, 10, 'LSTM')
model_GRU = RnnFlex(torch.nn.GRU, 10, 64, 128, 10, 'GRU')

for model in (model_RNN, model_LSTM, model_GRU):
    print()
    print('model is {}'.format(model.name))

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())   
    for ep in range(100):

        train_loss = 0.
        train_passed = 0
        train_acc_sum = 0
        n_train = 0

        model.train()
        for X_b, y_b in data_train:
            #print(type(X_b), type(y_b))
            optimizer.zero_grad()
            #print(X_b.shape, y_b.shape) 
            #print(X_b, y_b)   
            answers = model(X_b)
            if model.name == 'LSTM':          
                answers = answers.squeeze()           
            #answers = answers.view(-1, len(INDEX_TO_CHAR))
            #print(y_b.shape)
       
            predicted = sample(answers)
            #print(X_b.shape, y_b.shape, answers.shape, predicted.shape)
            #print(predicted, y_b)
            train_acc_sum += (predicted == y_b).sum().item()
            n_train += y_b.shape[0]

            loss = criterion(answers, y_b)

            #print(predicted)
            #print(y_b)
            #print(train_acc_sum)
            #print(y_b.shape[0])

            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            train_passed += 1

        model.eval()

        test_acc_sum = 0
        n_test = 0

        for X_test, y_test in data_test:
            #print(X_test.shape, y_test.shape)
            answers = model(X_test) 
            if model.name == 'LSTM':
                answers = answers.squeeze()
            predicted = sample(answers)
            #print(predicted, y_test)
            test_acc_sum += (predicted == y_test).sum().item()
            n_test += y_test.shape[0]

        if ep % 10 == 0:
            print("Epoch {}    Train Loss: {:.3f}  Train acc: {:.3f}  Test acc: {:.3f}".format(ep, train_loss / train_passed, train_acc_sum / n_train, test_acc_sum / n_test))

    valid_acc_sum = 0
    n_valid = 0

    for X_valid, y_valid in data_valid:
        #print(X_test.shape, y_test.shape)
        answers = model(X_valid) 
        if model.name == 'LSTM':
            answers = answers.squeeze()
        #print(predicted.shape)
        predicted = sample(answers)
        valid_acc_sum += (predicted == y_valid).sum().item()
        n_valid += y_valid.shape[0]

    print("Valid acc: {:.3f}".format(valid_acc_sum / n_valid))




model is RNN
Epoch 0    Train Loss: 1.838  Train acc: 0.150  Test acc: 0.267
Epoch 10    Train Loss: 0.062  Train acc: 0.964  Test acc: 0.933
Epoch 20    Train Loss: 0.037  Train acc: 0.957  Test acc: 0.967
Epoch 30    Train Loss: 0.033  Train acc: 0.979  Test acc: 0.933
Epoch 40    Train Loss: 0.031  Train acc: 0.971  Test acc: 0.967
Epoch 50    Train Loss: 0.030  Train acc: 0.986  Test acc: 1.000
Epoch 60    Train Loss: 0.029  Train acc: 0.986  Test acc: 0.967
Epoch 70    Train Loss: 0.029  Train acc: 0.986  Test acc: 1.000
Epoch 80    Train Loss: 0.028  Train acc: 0.979  Test acc: 0.933
Epoch 90    Train Loss: 0.028  Train acc: 0.993  Test acc: 0.967
Valid acc: 1.000

model is LSTM
Epoch 0    Train Loss: 2.180  Train acc: 0.107  Test acc: 0.133
Epoch 10    Train Loss: 0.102  Train acc: 0.929  Test acc: 0.967
Epoch 20    Train Loss: 0.054  Train acc: 0.986  Test acc: 0.933
Epoch 30    Train Loss: 0.036  Train acc: 0.986  Test acc: 0.967
Epoch 40    Train Loss: 0.032  Train acc: 0.97