In [None]:
import datetime

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
data_dir = 'drive/My Drive/'
train_lang = 'en'

In [None]:
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence

class DatasetSeq(Dataset):
    def __init__(self, data_dir, train_lang='en'):
	#open file
        with open(data_dir + train_lang + '.train', 'r') as f:
            train = f.read().split('\n\n')

        # delete extra tag markup
        train = [x for x in train if not '_ ' in x]
	    #init vocabs of tokens for encoding {<str> token: <int> id}
        self.target_vocab = {} # {p: 1, a: 2, r: 3, pu: 4}
        self.word_vocab = {} # {cat: 1, sat: 2, on: 3, mat: 4, '.': 5}
        self.char_vocab = {} # {c: 1, a: 2, t: 3, ' ': 4, s: 5}
	    
        # Cat sat on mat. -> [1, 2, 3, 4, 5]
        # p    a  r  p pu -> [1, 2, 3, 1, 4]
        # chars  -> [1, 2, 3, 4, 5, 2, 3, 4]

	    #init encoded sequences lists (processed data)
        self.encoded_sequences = []
        self.encoded_targets = []
        self.encoded_char_sequences = []
        # n=1 because first value is padding
        n_word = 1
        n_target = 1
        n_char = 1
        for line in train:
            sequence = []
            target = []
            chars = []
            for item in line.split('\n'):
                if item != '':
                    word, label = item.split(' ')

                    if self.word_vocab.get(word) is None:
                        self.word_vocab[word] = n_word
                        n_word += 1
                    if self.target_vocab.get(label) is None:
                        self.target_vocab[label] = n_target
                        n_target += 1
                    for char in word:
                        if self.char_vocab.get(char) is None:
                            self.char_vocab[char] = n_char
                            n_char += 1
                    sequence.append(self.word_vocab[word])
                    target.append(self.target_vocab[label])
                    chars.append([self.char_vocab[char] for char in word])
            self.encoded_sequences.append(sequence)
            self.encoded_targets.append(target)
            self.encoded_char_sequences.append(chars)

    def __len__(self):
        return len(self.encoded_sequences)

    def __getitem__(self, index):
        return {
            'data': self.encoded_sequences[index], # [1, 2, 3, 4, 6] len=5
            'char': self.encoded_char_sequences[index],# [[1,2,3], [4,5], [1,2], [2,6,5,4], []] len=5
            'target': self.encoded_targets[index], #  (1)
        }

In [None]:
dataset = DatasetSeq(data_dir)

In [None]:
def collate_fn(batch):
    data = []
    target = []
    for item in batch:
        data.append(torch.as_tensor(item['data']))
        target.append(torch.as_tensor(item['target']))
    data = pad_sequence(data, batch_first=True, padding_value=0)
    target = pad_sequence(target, batch_first=True, padding_value=0)

    return {'data': data, 'target': target}

In [None]:
# GRU
class GRUPredictor(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.gru = nn.GRU(emb_dim, hidden_dim, batch_first=True)
        self.classifier = nn.Linear(hidden_dim, n_classes)
        self.hidden_dim = hidden_dim
    
    def forward(self, x):
        emb = self.word_emb(x)
        hidden, _ = self.gru(emb)
        classes = self.classifier(hidden)

        return classes

In [None]:
# RNN
class RNNPredictor(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.gru = nn.RNN(emb_dim, hidden_dim, batch_first=True)
        self.classifier = nn.Linear(hidden_dim, n_classes)
        self.hidden_dim = hidden_dim
    
    def forward(self, x):
        emb = self.word_emb(x)
        hidden, _ = self.gru(emb)
        classes = self.classifier(hidden)

        return classes

In [None]:
# LSTM
class LSTMPredictor(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.gru = nn.LSTM(emb_dim, hidden_dim, batch_first=True)
        self.classifier = nn.Linear(hidden_dim, n_classes)
        self.hidden_dim = hidden_dim
    
    def forward(self, x):
        emb = self.word_emb(x)
        hidden, _ = self.gru(emb)
        classes = self.classifier(hidden)

        return classes

In [None]:
#hyper params
vocab_size = len(dataset.word_vocab) + 1
n_classes = len(dataset.target_vocab) + 1
n_chars = len(dataset.char_vocab) + 1
emb_dim = 256
hidden = 256
n_epochs = 10
batch_size = 64
cuda_device = 0
device = f'cuda:{cuda_device}' if cuda_device != -1 else 'cpu'

# GRU

In [None]:
gru_model = GRUPredictor(vocab_size, emb_dim, hidden, n_classes).to(device)
gru_model.train()
optim = torch.optim.Adam(gru_model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
start = datetime.datetime.now()
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    for i, batch in enumerate(dataloader):
        optim.zero_grad()

        predict = gru_model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 200 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(gru_model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
end = datetime.datetime.now() - start
print('GRU learning time is: ', end)

epoch: 0, step: 0, loss: 2.894826650619507
epoch: 0, step: 200, loss: 0.20524267852306366
epoch: 1, step: 0, loss: 0.17416904866695404
epoch: 1, step: 200, loss: 0.12756206095218658
epoch: 2, step: 0, loss: 0.116753488779068
epoch: 2, step: 200, loss: 0.09209083020687103
epoch: 3, step: 0, loss: 0.08104131370782852
epoch: 3, step: 200, loss: 0.08020239323377609
epoch: 4, step: 0, loss: 0.06311041861772537
epoch: 4, step: 200, loss: 0.0722648873925209
epoch: 5, step: 0, loss: 0.0323161780834198
epoch: 5, step: 200, loss: 0.04849035292863846
epoch: 6, step: 0, loss: 0.03726932033896446
epoch: 6, step: 200, loss: 0.03833552822470665
epoch: 7, step: 0, loss: 0.01742696575820446
epoch: 7, step: 200, loss: 0.02975892834365368
epoch: 8, step: 0, loss: 0.01121133379638195
epoch: 8, step: 200, loss: 0.015591143630445004
epoch: 9, step: 0, loss: 0.01610044203698635
epoch: 9, step: 200, loss: 0.01326102577149868
GRU learning time is:  0:00:30.611860


#RNN

In [None]:
rnn_model = RNNPredictor(vocab_size, emb_dim, hidden, n_classes).to(device)
rnn_model.train()
optim = torch.optim.Adam(rnn_model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
start = datetime.datetime.now()
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    for i, batch in enumerate(dataloader):
        optim.zero_grad()

        predict = rnn_model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 200 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(rnn_model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
end = datetime.datetime.now() - start
print('RNN learning time is: ', end)

epoch: 0, step: 0, loss: 3.0338234901428223
epoch: 0, step: 200, loss: 0.21511203050613403
epoch: 1, step: 0, loss: 0.2263086587190628
epoch: 1, step: 200, loss: 0.13073144853115082
epoch: 2, step: 0, loss: 0.11319169402122498
epoch: 2, step: 200, loss: 0.0993996113538742
epoch: 3, step: 0, loss: 0.08551615476608276
epoch: 3, step: 200, loss: 0.09885741025209427
epoch: 4, step: 0, loss: 0.07122472673654556
epoch: 4, step: 200, loss: 0.04295482113957405
epoch: 5, step: 0, loss: 0.06360017508268356
epoch: 5, step: 200, loss: 0.07813771069049835
epoch: 6, step: 0, loss: 0.05306210741400719
epoch: 6, step: 200, loss: 0.06509897112846375
epoch: 7, step: 0, loss: 0.025432011112570763
epoch: 7, step: 200, loss: 0.05069582909345627
epoch: 8, step: 0, loss: 0.027461526915431023
epoch: 8, step: 200, loss: 0.03522291034460068
epoch: 9, step: 0, loss: 0.015701087191700935
epoch: 9, step: 200, loss: 0.0238655973225832
RNN learning time is:  0:00:21.302009


# LSTM

In [None]:
lstm_model = LSTMPredictor(vocab_size, emb_dim, hidden, n_classes).to(device)
lstm_model.train()
optim = torch.optim.Adam(lstm_model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
start = datetime.datetime.now()
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    for i, batch in enumerate(dataloader):
        optim.zero_grad()

        predict = lstm_model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 200 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(lstm_model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
end = datetime.datetime.now() - start
print('LSTM learning time is: ', end)

epoch: 0, step: 0, loss: 3.056910514831543
epoch: 0, step: 200, loss: 0.15730169415473938
epoch: 1, step: 0, loss: 0.11425551027059555
epoch: 1, step: 200, loss: 0.15840643644332886
epoch: 2, step: 0, loss: 0.13073429465293884
epoch: 2, step: 200, loss: 0.09575193375349045
epoch: 3, step: 0, loss: 0.05823034793138504
epoch: 3, step: 200, loss: 0.09169229120016098
epoch: 4, step: 0, loss: 0.07322391122579575
epoch: 4, step: 200, loss: 0.014530912041664124
epoch: 5, step: 0, loss: 0.038003530353307724
epoch: 5, step: 200, loss: 0.043122172355651855
epoch: 6, step: 0, loss: 0.040944911539554596
epoch: 6, step: 200, loss: 0.043931033462285995
epoch: 7, step: 0, loss: 0.03390738368034363
epoch: 7, step: 200, loss: 0.037540897727012634
epoch: 8, step: 0, loss: 0.015789613127708435
epoch: 8, step: 200, loss: 0.021603120490908623
epoch: 9, step: 0, loss: 0.014474991708993912
epoch: 9, step: 200, loss: 0.013995473273098469
LSTM learning time is:  0:00:33.671119


**Вывод**: GRU и LSTM при заданных гиперпараметрах показали примерно одинаковое качество и время обучения, качество RNN хуже хотя модель обучается быстрее 

#Example

GRU

In [None]:
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    gru_model.eval()
    predict = gru_model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l-1] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'SCONJ', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.003481


RNN

In [None]:
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    rnn_model.eval()
    predict = rnn_model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l-1] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'ADV', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.003348


LSTM

In [None]:
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    lstm_model.eval()
    predict = lstm_model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l-1] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'ADV', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.002817


**Вывод:** за одинаковое количество эпох (10) RNN модель обучилась быстрее чем модели GRU, LSTM. На предсказании частей речи все три модели затрачивают одинаковое время, однако модель GRU делает ошибку на четвертом слове определяя наречие как союз. Можно сделать вывод, что на данном примере модель RNN показывает более быструю работы при хорошем качестве предсказания.