In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt

import re

%matplotlib inline

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
df = pd.read_csv('quotes.txt', sep='\t', header=None, names=['quotes'])
df = df.map(lambda x: re.sub('[.,]', '', x.lower()))

In [4]:
import torchtext.vocab as vocab

glove = vocab.GloVe(name='6B', dim=100)

In [5]:
df['quotes_enc'] = df['quotes'].map(lambda w: [glove.stoi.get(i, 1) for i in w.split()]) 

In [6]:
data = []
for enc_quote in df['quotes_enc']:
    for ix in enc_quote:
        data.append(ix)
len(data)

69498

In [7]:
tensor_data = torch.tensor(data).to(device)

In [88]:
block_size = 8
batch_size = 1

def build_train_test_dataset(data, train_size_percent, block_size, batch_size):
    
    n = int(train_size_percent / 10 * len(data))
    
    train_data = data[:n]
    val_data = data[n:]
    
    ix = torch.randint(len(data) - block_size, (batch_size, ))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    
    return x, y
    
    
X, Y = build_train_test_dataset(tensor_data, 70, block_size, batch_size) 
print(X)
print(Y)

tensor([[  171,     5, 16942,    59,   185, 19423,     1,    36]],
       device='cuda:0')
tensor([[    5, 16942,    59,   185, 19423,     1,    36,   285]],
       device='cuda:0')


In [9]:
def make_lower_tril_from(original_matrix):
    triangular_matrix = torch.tril(original_matrix.repeat(original_matrix.size(1), 1), diagonal=0)
    return triangular_matrix

In [94]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        
        self.embedings = nn.Embedding.from_pretrained(glove.vectors)
        self.num_features = glove.dim
        self.num_layers_rnn = 1
        self.num_directions_rnn = 1 # if 2 means prediction of past and future
        self.hidden_size = self.num_features // 2
        
        self.rnn = nn.RNN(input_size=self.num_features,
                          hidden_size=self.hidden_size,
                          num_layers=self.num_layers_rnn,
                          batch_first=True).to(device)
        
        self.fc = nn.Linear(50, len(glove)).to(device)
        
        
    def forward(self, x, targets):
        B, T, C = x.shape
        
        h0 = torch.zeros(self.num_layers_rnn * self.num_directions_rnn, 
                         B,
                         self.hidden_size,
                         requires_grad=True).to(device)
        
        x = self.embedings(x)
        x = x.view(B, T, -1)
        
        out, hidden = self.rnn(x, h0)
        
        out = out.view(-1, 50)
        
        
        out = self.fc(out)
        
        targets = targets.view(-1)
        
        loss = F.cross_entropy(out, targets)
        
        return out, hidden, loss
        

model = RNN().to(device)
total_params = sum(p.numel() for p in model.parameters())
print(f'Общее количество обучаемых параметров в модели: {total_params}')

Общее количество обучаемых параметров в модели: 60407600


In [95]:
epochs = 3000
lr = 0.00001
lossi = []

In [96]:
model.train = True
optimizer = optim.Adam(model.parameters(), lr=lr)


for epoch in range(epochs):
    inp, actual = build_train_test_dataset(tensor_data, 70, block_size, batch_size)
    inp = inp.unsqueeze(-1)
    
    optimizer.zero_grad(set_to_none=True)

    out, hidden, loss = model(inp, actual)
    
    loss.backward()
        
    optimizer.step()
    if epoch % 300 == 0:
        print(loss)
    lossi.append(loss.item())
loss

tensor(13.0076, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.8978, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.7545, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.9384, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.8135, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(13.0635, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.6672, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.6165, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(12.4818, device='cuda:0', grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 

In [70]:
plt.plot(torch.tensor(lossi).view(-1, 100).mean(1))

RuntimeError: shape '[-1, 100]' is invalid for input of size 20921

In [50]:
model.eval = True
hidden = torch.zeros(2, 1, 100).to(device)
inp = glove.stoi['the']
k = 3
context = []
context.append(inp)
for i in range(20):
    out, hidden = model(torch.tensor(context.copy()).to(device), hidden)
    topk_values, topk_indices = torch.topk(out, k)
    sampled_index = torch.multinomial(F.softmax(topk_values, dim=0), 1)
    context.append(topk_indices[0][sampled_index[0].item()].item())

In [51]:
' '.join(map(lambda x: glove.itos[x], [i for i in context]))

'the same mentality 1960s his american world world kids world same 1960s mentality family pleasure addict world world crafts sign metric'

In [52]:
torch.save(model.state_dict(), 'model3')