# Following the tutorial to create a Character-Level Recurrent Neural Network
*   https://github.com/LeanManager/NLP-PyTorch

In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

# Preprocessing Data

In [2]:
with open('bible.txt', 'r') as f:
    text = f.read()

In [3]:
chars = tuple(set(text))

int2char = dict(enumerate(chars))

char2int = {char: integer for integer, char in int2char.items()}

encoded = np.array([char2int[ch] for ch in text]) # text in number format

## One-Hot Encoding
*   Se faz a transformação em One Hot para poder facilmente gerar os vetores depois para cada caractere

In [4]:
def one_hot_encoder(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot  
#Isso vai gerar a matriz de one hot

## Create Mini-Batches

In [5]:
def get_batches(arr, n_seqs, n_steps):

    batch_size = n_seqs * n_steps #Numero de sentencas e numero de caracteres (tamanho das sentenças)
    n_batches = len(arr)//batch_size
    
    # Mantém um número que não sobram batches inacabados
    arr = arr[:n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        
        # The features
        x = arr[:, n:n+n_steps]
        
        # The targets, shifted by one
        y = np.zeros_like(x)
        
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [6]:
batches = get_batches(encoded, 10, 50)

In [7]:
x,y = next(batches)

In [8]:
for k in x:
  text = ''
  for i in k:
    text = text + int2char[i]
  print(text)

1:1 In the beginning God created the heaven and th
at reddish, and it be
shewed to the priest; 13:20 
the LORD followed them.

6:9 And the armed men wen
 shew thyself pure; and with the froward
thou wilt
they were destroyed before the LORD, and before hi
tter than an ox or bullock
that hath horns and hoo
ath from polluting it, and taketh hold
of my coven
, saith the Lord GOD, be it known
unto you: be ash
:23 If any man have ears to hear, let him hear.

4
at after the most
straitest sect of our religion I


In [9]:
for k in y:
  text = ''
  for i in k:
    text = text + int2char[i]
  print(text)

:1 In the beginning God created the heaven and the
t reddish, and it be
shewed to the priest; 13:20 A
he LORD followed them.

6:9 And the armed men went
shew thyself pure; and with the froward
thou wilt 
hey were destroyed before the LORD, and before his
ter than an ox or bullock
that hath horns and hoof
th from polluting it, and taketh hold
of my covena
 saith the Lord GOD, be it known
unto you: be asha
23 If any man have ears to hear, let him hear.

4:
t after the most
straitest sect of our religion I 


# Create RNN

In [10]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_steps = 100, n_hidden = 256, n_layers = 2, drop_prob = 0.5, lr = 0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr

        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch:ii for ii,ch in self.int2char.items()}

        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)

        self.dropout = nn.Dropout(drop_prob)

        self.fc = nn.Linear(n_hidden, len(self.chars))

        self.init_weights()

    def forward(self, x, hc):
        x, (h,c) = self.lstm(x,hc)
        x = self.dropout(x)
        x = x.view(x.size()[0]*x.size()[1], self.n_hidden)
        x = self.fc(x)

        return x, (h,c)

    def predict(self, char, h=None, cuda=False, top_k=None):
        if cuda:
            self.cuda()
        else:
            self.cpu()

        if h is None:
            h = self.init_hidden(1)

        x = np.array([[self.char2int[char]]])
        x = one_hot_encoder(x, len(self.chars))

        inputs = torch.from_numpy(x)
        if cuda:
            inputs.cuda()

        h = tuple([each.data for each in h])
        out, h = self.forward(inputs, h)    
        p = F.softmax(out, dim=1).data

        if cuda:
            p = p.cpu()
        if top_k is None:
            top_ch = np.arange(len(self.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()

        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p = p/p.sum())

        return self.int2char[char], h 

    def init_weights(self):
        initrange = 0.1
        self.fc.bias.data.fill_(0)
        self.fc.bias.data.uniform_(-1,1)

    def init_hidden(self,n_seqs):
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
        weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

# Train

In [11]:
def train(net,data,epochs=10,n_seqs=10,n_steps=50,lr=0.001,clip=5,val_frac=0.1,cuda=False,print_every=10):
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr,weight_decay=0.0001)
    criterion = nn.CrossEntropyLoss()

    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    if cuda:
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)

    for epoch in range(epochs):
        h = net.init_hidden(n_seqs)
        for x, y in get_batches(encoded, n_seqs, n_steps):
            counter += 1

        x = one_hot_encoder(x,n_chars)
        inputs,targets = torch.from_numpy(x), torch.from_numpy(y)

        if cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
          
        h = tuple([each.data for each in h])

        net.zero_grad()

        output, h = net.forward(inputs, h)
        loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))
        loss.backward()

        nn.utils.clip_grad_norm_(net.parameters(), clip)

        opt.step()

        if counter % print_every == 0:
              
            val_h = net.init_hidden(n_seqs)
            val_losses = []
              
            for x, y in get_batches(val_data, n_seqs, n_steps):
                  
                x = one_hot_encoder(x, n_chars)
                x, y = torch.from_numpy(x), torch.from_numpy(y)

                val_h = tuple([each.data for each in val_h])

                inputs, targets = x, y
                if cuda:
                  inputs, targets = inputs.cuda(), targets.cuda()

                output, val_h = net.forward(inputs, val_h)
                val_loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))

                val_losses.append(val_loss.item())
              
            print("Epoch: {}/{}...".format(epoch+1, epochs),
            "Step: {}...".format(counter),
            "Loss: {:.4f}...".format(loss.item()),
            "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [12]:
if 'net' in locals():
    del net

net = CharRNN(chars,n_hidden=512, n_layers=2)
print(net)

CharRNN(
  (lstm): LSTM(80, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=80, bias=True)
)


In [13]:
n_seqs, n_steps = 128, 100

train(net, encoded, epochs=20, n_seqs=n_seqs, n_steps=n_steps, lr=0.001, cuda=True, print_every=10)

Epoch: 1/20... Step: 10... Loss: 3.2596... Val Loss: 3.2639
Epoch: 1/20... Step: 20... Loss: 3.2092... Val Loss: 3.2032
Epoch: 1/20... Step: 30... Loss: 3.1754... Val Loss: 3.1812
Epoch: 1/20... Step: 40... Loss: 3.1689... Val Loss: 3.1728
Epoch: 1/20... Step: 50... Loss: 3.1672... Val Loss: 3.1688
Epoch: 1/20... Step: 60... Loss: 3.1600... Val Loss: 3.1635
Epoch: 1/20... Step: 70... Loss: 3.1574... Val Loss: 3.1605
Epoch: 1/20... Step: 80... Loss: 3.1619... Val Loss: 3.1497
Epoch: 1/20... Step: 90... Loss: 3.1172... Val Loss: 3.1265
Epoch: 1/20... Step: 100... Loss: 3.0670... Val Loss: 3.0681
Epoch: 1/20... Step: 110... Loss: 2.9509... Val Loss: 2.9676
Epoch: 1/20... Step: 120... Loss: 3.3459... Val Loss: 2.9048
Epoch: 1/20... Step: 130... Loss: 2.8346... Val Loss: 2.8600
Epoch: 1/20... Step: 140... Loss: 2.7227... Val Loss: 2.7544
Epoch: 1/20... Step: 150... Loss: 2.6179... Val Loss: 2.6781
Epoch: 1/20... Step: 160... Loss: 2.5405... Val Loss: 2.5906
Epoch: 1/20... Step: 170... Loss:

In [15]:
model_name = 'rnn.net'


checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [16]:
with open('rnn.net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [17]:
def sample(net, size, prime='The', top_k=None, cuda=False):
        
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    
    h = net.init_hidden(1)
    
    for ch in prime:
        char, h = net.predict(ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        
        char, h = net.predict(chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [18]:
print(sample(loaded, 2000, prime='And Bruno Lima said', top_k=5, cuda=False))

And Bruno Lima said,
The son of Sheala and Ashan, all Abrahashan, the stild which will
die an angel that said, Behold, thou shalt not.

2:11 And he said, I shell be after this disinasion of thy father, in
a man of thee, and shall be thee. And we have done with them.

1:14 Afsering they had saw also, whose way they be and the see with
them.

429:13 And he had said, I will say unto you, I will come, and hath thine
are with their hand and forgiven your shears, and that they was a
good, and to the people shall break the companicions.

23:20 Therefore, and he thou senteds all hand the children of Israel,
and therefore will shek against the LORD, and all the serils of
the whole, and will stand in the sons of Josah, saying, When thou shalt
treaken to his face of the sacrifice of them: for they will saith
us all that shall not, before thee, and walk in her son was brought
to Jerusalem.

1:24 And the presencs of the poreir of the callow also in the trouble thereof
the sabettare, and were their 