### <u>One Piece Character Gen - Character Level LSTM</u>

In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

### Data Processing:

In [2]:
# open text file and read in data as `text`
with open('names.csv', 'r') as f:
    text = f.read()
#show some names:
print(text[:30])

abdullah
absalom
acilia
adele



In [3]:
#shuffle the names
import random
names = text.split('\n')
random.shuffle(names)
text = "\n".join(names)

#show some names:
print(text[:30])

mountain ricky
chesskippa
cott


In [4]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# encode the text
encoded = np.array([char2int[ch] for ch in text])
print("{} unique chars".format(len(chars)))
print("Encoded text:",encoded[:10])

40 unique chars
Encoded text: [ 0  1 18 11  3  6 20 11 16 37]


In [5]:
def one_hot_encode(arr, n_labels):
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot

#eg:
test_seq = np.array([[0, 1, 2]])
one_hot = one_hot_encode(test_seq, 3)
print(one_hot)

[[[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]]


### Batch Maker

In [37]:
def get_batches(arr,batch_size,seq_length):
    """
    Creates the generator that reutrns batch data for `arr` list
    of `batch_size` having `seq_lenght`.
    """
    num_data_per_epoch = batch_size*seq_length
    possible_batches = len(arr)//num_data_per_epoch
    
    #trim array to possible bathces
    arr = arr[:possible_batches*num_data_per_epoch]
    
    #reshae to batch first dim
    arr = arr.reshape(batch_size,-1)
    
    #yeild one sequence in each interation
    for i in range(0,arr.shape[1],seq_length):
        # The features
        x = arr[:, i:i+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, i+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

#test batch_size:4 seq_length:5:
batches = get_batches(encoded, 4, 5)
x, y = next(batches)

print("x:\n",x)
print("y:\n",y)

x:
 [[ 0  1 18 11  3]
 [18 36  2  4 15]
 [17 17  6  4 13]
 [ 2 13 22 22 25]]
y:
 [[ 1 18 11  3  6]
 [36  2  4 15  6]
 [17  6  4 13  2]
 [13 22 22 25 17]]


### Defining the NN model

In [38]:
class NetWork(nn.Module):
    def __init__(self,tokens,n_hidden=256,n_layers=1,drop_prob=0.5,lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        # creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        #defining NN
        self.lstm = nn.LSTM(
            len(tokens),
            n_hidden,
            n_layers,
            dropout=drop_prob,
            batch_first=True
        )
        self.drop = nn.Dropout(p=self.drop_prob)
        self.fcn = nn.Linear(n_hidden, len(tokens))
    
    def forward(self,x,hidden):
        r_out,hidden = self.lstm(x,hidden)
        out = self.drop(r_out)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.fcn(out)
        
        # return the final output and the hidden state
        return out, hidden
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden
        

### Training the model

In [39]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU :D')
else: 
    print('No GPU available :(, training on CPU; consider making n_epochs very small.')

Training on GPU :D


In [85]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output, h = net(inputs, h)
            
            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(batch_size*seq_length))
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length))
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [90]:
# define and print the net
n_hidden=256
n_layers=2

net = NetWork(chars, n_hidden, n_layers)
print(net)

NetWork(
  (lstm): LSTM(40, 256, num_layers=2, batch_first=True, dropout=0.5)
  (drop): Dropout(p=0.5, inplace=False)
  (fcn): Linear(in_features=256, out_features=40, bias=True)
)


In [91]:
train(net, encoded, epochs=40, batch_size=32, seq_length=5, lr=0.001, clip=5, val_frac=0.2, print_every=50)

Epoch: 1/40... Step: 50... Loss: 3.0630... Val Loss: 3.0946
Epoch: 2/40... Step: 100... Loss: 3.0337... Val Loss: 3.0847
Epoch: 3/40... Step: 150... Loss: 2.9860... Val Loss: 3.0300
Epoch: 4/40... Step: 200... Loss: 2.8987... Val Loss: 2.8582
Epoch: 5/40... Step: 250... Loss: 2.7619... Val Loss: 2.6551
Epoch: 6/40... Step: 300... Loss: 2.7437... Val Loss: 2.5597
Epoch: 7/40... Step: 350... Loss: 2.6643... Val Loss: 2.5082
Epoch: 8/40... Step: 400... Loss: 2.5533... Val Loss: 2.4828
Epoch: 9/40... Step: 450... Loss: 2.2478... Val Loss: 2.4558
Epoch: 10/40... Step: 500... Loss: 2.4630... Val Loss: 2.4316
Epoch: 11/40... Step: 550... Loss: 2.4654... Val Loss: 2.4207
Epoch: 12/40... Step: 600... Loss: 2.3428... Val Loss: 2.4144
Epoch: 13/40... Step: 650... Loss: 2.2106... Val Loss: 2.4070
Epoch: 14/40... Step: 700... Loss: 2.1490... Val Loss: 2.4071
Epoch: 15/40... Step: 750... Loss: 2.0739... Val Loss: 2.3983
Epoch: 16/40... Step: 800... Loss: 2.3760... Val Loss: 2.3973
Epoch: 17/40... St

### Prediction

In [92]:
def predict(net, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.int2char[char], h

In [93]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [134]:
names = sample(net, 500, prime='bashyal', top_k=5)
names = names.split("\n")

In [138]:
#printing the generated samples
print("List of names generated from root word `bashyal`:\n")
for name in names:
    if len(name) > 5:
        print(name)

List of names generated from root word `bashyal`:

bashyal
kencho
cosino
haramam
hangora devergo
mottan mara
sanjer
korokun
charlotte pock
cannshiku karina
chirot
pansso
charlottt hackbero
charlotte pocanser
charlotte perine
karaka
matsunento
ponchoto
banchan
charlotte alemigaru
dangon
konjin
misaro
mineess
chichas
minite rashili
teu charlotte motchat
charlotte pragin
charlotte braten rask
charotte paromer
anshi hatchi
charlotte purash
kamasun
chardo
