In [29]:
# import necessary package

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [30]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


# Load Data

In [31]:
# Open text file
with open("laskar_pelangi.txt", "r") as f:
    text = f.read()

In [32]:
# check the first 100 characters
text[:100]

'seorang bapak tua berwajah sabar, Bapak K.A. Harfan Efendy Noor, \nsang kepala sekolah dan seorang wa'

# Tokenization

In [33]:
chars = tuple(set(text))

# pair int with character in text
int2char = {i: ch for i, ch in enumerate(set(text))}

# reverse
char2int = {ch: i for (i, ch) in int2char.items()}

In [34]:
# get text encoding result
encoded = np.array([char2int[ch] for ch in text])

encoded[:10]

array([40, 33, 81, 62, 47, 26, 30, 21, 90, 47])

In [35]:
len(encoded)

692185

In [36]:
def one_hot_encode(arr, n_labels):
    """
    function to apply one hot encoding in sequence array

    params
        arr: sequence array
        n_labels: number of characters (size of arr)

    returns
        one hot encoded array
    """
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape, n_labels))

    return one_hot

In [37]:
sample = np.array([[3, 5, 2]])

one_hot_encode(sample, n_labels=8)

array([[[0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0.]]], dtype=float32)

# Create Batch

In [38]:
def get_batches(arr, batch_size, seq_length):

    batch_size_total = batch_size * seq_length

    n_batches = len(arr) // batch_size_total

    arr = arr[:n_batches * batch_size_total]

    arr = arr.reshape((batch_size, -1))

    for n in range(0, arr.shape[1], seq_length):

        x = arr[:, n:n+seq_length]

        y = np.zeros_like(x)

        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        
        yield x, y


In [39]:
batches = get_batches(encoded, 8, 50)

x, y = next(batches)

In [40]:
class CharRNN(nn.Module):

    def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):

        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr

        # create char dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}

        # define layers of the model
        self.lstm = nn.LSTM(
            len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True
        )

        # define a dropout layer
        self.dropout = nn.Dropout(drop_prob)

        # define the final, fully connected output layer
        self.fc = nn.Linear(n_hidden, len(self.chars))

    def forward(self, x, hidden):

        # get the outputs and the new hidden state from the lstm
        r_output, hidden = self.lstm(x, hidden)

        # pass through a dropout layer
        out = self.dropout(r_output)

        # stack up LSTM using view
        out = out.contiguous().view(-1, self.n_hidden)

        # put x through the fully connectedd layer
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):

        # create two new tensors with sizes n_layers x batch_size x n_hidden
        # initialized to zero, for hidden state and cell state of LSTM

        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (
                weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda()
            )
        else:
            hidden = (
                weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                weight.new(self.n_layers, batch_size, self.n_hidden).zero_()
            )

        return hidden


In [41]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):

    net.train()

    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # create training and validation
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    if(train_on_gpu):
        net.cuda()

    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)

        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1

            # one hot
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # create new variables for the hidden state
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()

            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()

            # loss stats
            if counter % print_every == 0:

                # get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()

                for x, y in get_batches(val_data, batch_size, seq_length):

                    # one hot encode
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)

                    val_h = tuple([each.data for each in val_h])

                    inputs, targets = x, y
                    if (train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())

                    val_losses.append(val_loss.item())

                # reset to train mode after iterate validation
                net.train()

                print(
                    f"Epoch: {e+1}/{epochs}...",
                    f"Step: {counter}...",
                    "Loss: {:.4f}...".format(loss.item()),
                    "Val Loss: {:.4f}".format(np.mean(val_losses))
                )

# instantiate model

In [42]:
n_hidden = 512
n_layers = 2

net = CharRNN(chars, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(93, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=93, bias=True)
)


In [43]:
batch_size = 128
seq_length = 100
n_epochs = 20

# train
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length)

Epoch: 1/20... Step: 10... Loss: 3.1452... Val Loss: 3.0748
Epoch: 1/20... Step: 20... Loss: 3.0805... Val Loss: 3.0363
Epoch: 1/20... Step: 30... Loss: 3.0465... Val Loss: 3.0299
Epoch: 1/20... Step: 40... Loss: 3.0371... Val Loss: 3.0294
Epoch: 2/20... Step: 50... Loss: 3.0363... Val Loss: 3.0252
Epoch: 2/20... Step: 60... Loss: 3.0531... Val Loss: 3.0230
Epoch: 2/20... Step: 70... Loss: 3.0357... Val Loss: 3.0190
Epoch: 2/20... Step: 80... Loss: 3.0309... Val Loss: 3.0089
Epoch: 2/20... Step: 90... Loss: 3.0025... Val Loss: 2.9837
Epoch: 3/20... Step: 100... Loss: 2.9395... Val Loss: 2.9099
Epoch: 3/20... Step: 110... Loss: 2.8034... Val Loss: 2.7781
Epoch: 3/20... Step: 120... Loss: 2.6861... Val Loss: 2.6372
Epoch: 3/20... Step: 130... Loss: 2.5561... Val Loss: 2.5351
Epoch: 3/20... Step: 140... Loss: 2.4982... Val Loss: 2.4621
Epoch: 4/20... Step: 150... Loss: 2.4405... Val Loss: 2.3798
Epoch: 4/20... Step: 160... Loss: 2.3731... Val Loss: 2.3407
Epoch: 4/20... Step: 170... Loss:

In [44]:
model_name = "rnn_20_epoch.net"

checkpoint = {
    "n_hidden": net.n_hidden,
    "n_layers": net.n_layers,
    "state_dict": net.state_dict(),
    "tokens": net.chars
}

with open(model_name, "wb") as f:
    torch.save(checkpoint, f)

In [45]:
def predict(net, char, h=None, top_k=None):

    # tensor input
    x = np.array([[net.char2int[char]]])
    x = one_hot_encode(x, len(net.chars))
    inputs = torch.from_numpy(x)

    if(train_on_gpu):
        inputs = inputs.cuda()

    # detach hidden state from history
    h = tuple([each.data for each in h])

    # get the output of the model
    out, h = net(inputs, h)

    # get the character probabilities
    p = F.softmax(out, dim=1).data
    if(train_on_gpu):
        p = p.cpu() # move to cpu
    
    # get top characters
    if top_k is None:
        top_ch = np.arange(len(net.chars))
    else:
        p, top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()

    # select the likely next character with some element of randomness
    p = p.numpy().squeeze()
    char = np.random.choice(top_ch, p=p/p.sum())

    return net.int2char[char], h

In [46]:
def sample(net, size, prime="The", top_k=None):

    if (train_on_gpu):
        net.cuda()
    else:
        net.cpu()

    net.eval()

    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)

    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return "".join(chars)

In [48]:
print(sample(net, 1000, prime="suatu hari", top_k=5))

suatu hari dari sampul tampa bertungguh 
manakiana melomoki duru sisi tika sangat dera mereka diamasiku dinggetar. 

"Bu Maram 
dan. 

Sekaling tak pelungan kami depat pelungi kanasatan mulam 
kami tidah di 
belikung,, berdangat ketadakun 
pari keritas kemalika ika tak bersenyundang penggamput pisuk kulut kami sama. Seperanya buah karena koreka adelah persus mata buah semua tak punya sepanjang. Aku tahun sebaah mengimiranku dalam sumbah daun saju selumah di sapang dari berakang. Sebenah poriati sita. 

Kama pulin tersekolasi. 

Ketakian, beraksi du pada duar kembatari di buahan banya karena sekali melahat bangku delan bentak-berut sama terbosanga dan sebuah kami mendekatkan poson kurang-tambang bukan, tapi bersisut komanya 
dengan birukan sembarang, sekorah sepeda 
meruasakan dan sungah terpulainya. Kiti keterasa karena sendiri. 

Aka maka mempuka merihatikan sepinya senduri di soko para siata bertaru tak berbuang duan tiasa ke mendapat 
pesan belun binga me- 
ngurtakkan kati seperti p

# Loading a Checkpoint

In [49]:
with open("rnn_20_epoch.net", "rb") as f:
    checkpoint = torch.load(f)

loaded = CharRNN(
    checkpoint["tokens"],
    n_hidden=checkpoint["n_hidden"],
    n_layers=checkpoint["n_layers"] 
)

loaded.load_state_dict(checkpoint["state_dict"])

<All keys matched successfully>

In [50]:
print(sample(loaded, 2000, top_k=5, prime="seorang bapak"))

seorang bapakan simasinya di diatang berak dirindah sumpah 
tapi tubanya. 

Sebuah kelak minit mereka. Suaru duler bahan, selelah masik ketika tembah, bentak kepantik 
meroka sumbai kami sepatai ke atah sekarang-bangang dengan seperti pusar manat 
menyilah semuanya belapan komisi sunda dua membuat di sukunan di saperah. Makanya semua sebugang berusas kolong korek sampai keladu pohon kemanisan menghomponkong berkorik, bahatan beratuah 
kami. Suaranya, selerang kanyangan, 
berkemadikan kumping sebertik pesangkan 
masam duru ke buahan, tangan kuning busan sina menyelahkan semengari satik menjelat di berunggah punya di dalam kemusia dan belapanan du semadah pertakangan 
karang 
karang di sana mengantar burung kilamat menyentuh, kute-diatasi seberapa sebuah tunati sepada 
di sini sekulih membawa sebuah. Mahar 
satu. 


Kami menjudukkan. 



188 



Andrea Hiratakan musih kami tampak pengintang sudah dalam masih. 

Aku bahas 
kati. 



49 



Andrea Hirata 

terahan semakin tangan-barang 
ma