# LSTM CharRNN - with different encoding strategies

### Imports and data processing

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils as utils
import numpy as np
import string
import random
import matplotlib.pyplot as plt
import timeit
import pickle

In [2]:
# Load the dataset
with open('shakespeare.txt', 'r') as f:
    text = f.read()

# Create character to index and index to character mappings
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}
printable_chars = tuple(set(string.printable))
printable_int2char = dict(enumerate(printable_chars))
printable_char2int = {ch:ii for ii,ch in printable_int2char.items()}

# Encode by character
encoded = np.array([char2int[ch] for ch in text])


## Encode the character pairs within the text
double_encoded = []
double_char2int = {}

# Grab all of the character pairs
for i in range(len(text) - 1):
    pair = text[i:i+2]
    if pair not in double_char2int:
        # Assign the next number to any new pairs
        double_char2int[pair] = len(double_char2int)
    # Encode the character pair with its numerical representation
    double_encoded.append(double_char2int[pair])
    #print(pair, double_char2int[pair])

double_int2char = {i: ch for ch, i in double_char2int.items()}
double_encoded = np.array(double_encoded)
pairs = tuple(set(double_char2int.keys()))


total_chars = len(chars)
total_pairs = len(pairs)
total_labels = 0


# We can see that there are 65 different characters within the tinyshakespeare dataset,
# far less than the total amount of printable characters,
print(total_chars, len(printable_chars))
print(char2int)
# Similarly, we can see that there are far less unique pairs of characters within the text than are possible.
# We will ignore pairs not included (like zz) to save on processing (and these pairs likely would be ignored anyways)
# Though this also means that we'll have to be careful if we ever ask this network to produce text from a string
# that isn't included in this encoding.  We'll handle that in a function later.
print(total_pairs, total_chars**2)
print(double_char2int)

65 100
{'v': 0, 'L': 1, 'w': 2, 'Q': 3, 't': 4, 'O': 5, 'X': 6, 'W': 7, 'q': 8, 'l': 9, 'F': 10, ';': 11, 'x': 12, 'r': 13, '!': 14, 'T': 15, 'Z': 16, 'C': 17, 'm': 18, 'S': 19, '$': 20, 'a': 21, '?': 22, "'": 23, 'f': 24, ',': 25, 'E': 26, ' ': 27, 'b': 28, 'U': 29, 'A': 30, '-': 31, 'G': 32, 'h': 33, 'Y': 34, 'P': 35, 'u': 36, 'y': 37, 'M': 38, 'd': 39, 'H': 40, '.': 41, 'e': 42, 'R': 43, 'K': 44, '&': 45, 'g': 46, 'J': 47, ':': 48, '\n': 49, 'k': 50, 'I': 51, 'o': 52, 's': 53, 'n': 54, 'V': 55, 'i': 56, 'p': 57, 'j': 58, 'N': 59, 'B': 60, 'c': 61, 'z': 62, 'D': 63, '3': 64}
1403 4225
{'Fi': 0, 'ir': 1, 'rs': 2, 'st': 3, 't ': 4, ' C': 5, 'Ci': 6, 'it': 7, 'ti': 8, 'iz': 9, 'ze': 10, 'en': 11, 'n:': 12, ':\n': 13, '\nB': 14, 'Be': 15, 'ef': 16, 'fo': 17, 'or': 18, 're': 19, 'e ': 20, ' w': 21, 'we': 22, ' p': 23, 'pr': 24, 'ro': 25, 'oc': 26, 'ce': 27, 'ee': 28, 'ed': 29, 'd ': 30, ' a': 31, 'an': 32, 'ny': 33, 'y ': 34, ' f': 35, 'fu': 36, 'ur': 37, 'rt': 38, 'th': 39, 'he': 40, 'er

In [3]:
# Just to try it, we can also do one with all possible 2-character combinations of the 65 characters used in the data

import itertools

max_pairs = [''.join(pair) for pair in itertools.product(chars, repeat=2)]
total_max_pairs = len(max_pairs)
max_char2int = {pair: index for index, pair in enumerate(max_pairs)}
max_int2char = {i: ch for ch, i in max_char2int.items()}
max_encoded = []

for i in range(len(text) - 1):
    pair = text[i:i+2]
    max_encoded.append(max_char2int[pair])

max_encoded = np.array(max_encoded)
total_max_pairs

4225

## Network definitions

In [4]:
# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_on_gpu = torch.cuda.is_available()
print(device)

cuda:0


In [5]:
class CharRNN_normal(torch.nn.Module):
    def __init__(self, tokens, n_hidden=500, n_layers=2, batch_size=64, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.lr = lr

        self.labels = tokens #string.printable
        self.printable_chars = printable_chars
        self.int2char = dict(enumerate(self.labels))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()} #dict(enumerate(self.printable_chars)).items()
        self.output_size = len(self.labels) #len(self.printable_chars)

        self.lstm = torch.nn.LSTM(self.output_size, n_hidden, n_layers,
                                  dropout=drop_prob, batch_first=True)

        self.dropout = torch.nn.Dropout(drop_prob)

        self.linear = torch.nn.Linear(n_hidden, self.output_size)

    def forward(self, x, hidden):
        # self.lstm's x wants (batch_size, seq_length, total_labels)
        r_output, hidden = self.lstm(x, hidden)
        out = self.dropout(r_output)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.linear(out)
        return out, hidden

    def init_hidden(self, batch_size): #=self.batch_size):
        weight = next(self.parameters()).data
        if train_on_gpu:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        return hidden

In [6]:
class CharRNN_doubleEncode(torch.nn.Module):
    def __init__(self, tokens, n_hidden=300, n_layers=2, batch_size=64, drop_prob=0.2, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.lr = lr

        self.labels = tokens
        self.double_int2char = double_int2char
        self.double_char2int = double_char2int
        self.output_size = len(self.labels)

        self.lstm = torch.nn.LSTM(self.output_size, n_hidden, n_layers,
                                  dropout=drop_prob, batch_first=True)

        self.dropout = torch.nn.Dropout(drop_prob)

        self.linear = torch.nn.Linear(n_hidden, self.output_size)

    def forward(self, x, hidden):
        # self.lstm's x wants (batch_size, seq_length, total_pairs)
        r_output, hidden = self.lstm(x, hidden)
        out = self.dropout(r_output)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.linear(out)
        return out, hidden

    def init_hidden(self, batch_size): #=self.batch_size):
        weight = next(self.parameters()).data
        if train_on_gpu:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        return hidden

In [7]:
class CharRNN_maxEncode(torch.nn.Module):
    def __init__(self, tokens, n_hidden=300, n_layers=2, batch_size=64, drop_prob=0.2, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.lr = lr

        self.labels = tokens
        self.double_int2char = max_int2char
        self.double_char2int = max_char2int
        self.output_size = len(self.labels)

        self.lstm = torch.nn.LSTM(self.output_size, n_hidden, n_layers,
                                  dropout=drop_prob, batch_first=True)

        self.dropout = torch.nn.Dropout(drop_prob)

        self.linear = torch.nn.Linear(n_hidden, self.output_size)

    def forward(self, x, hidden):
        # self.lstm's x wants (batch_size, seq_length, total_pairs)
        r_output, hidden = self.lstm(x, hidden)
        out = self.dropout(r_output)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.linear(out)
        return out, hidden

    def init_hidden(self, batch_size): #=self.batch_size):
        weight = next(self.parameters()).data
        if train_on_gpu:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        return hidden

## Encoding functions

In [8]:
# Function to one-hot encode the characters
def one_hot_encode(arr, n_labels=total_labels):
    # arr is shape seq_length, batch_size
    arr = arr.transpose(1,0)
    oh = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    oh[np.arange(oh.shape[0]), arr.flatten()] = 1
    oh = oh.reshape((*arr.shape, n_labels))
    return oh.transpose(1,0,2)

### Actually, didn't end up using these :( ###
# # Function to one-hot encode groups of 2 characters, in attempt to increase contextual range
# def one_hot_encode_double(arr, n_labels=total_labels):
#     # arr is shape seq_length, batch_size
#     arr = arr.transpose(1,0)
#     if (arr.shape[1]%2 == 1): arr = np.insert(arr, 0, 5, axis=1) # " " is 5, to make the new seq_length even
#     new = np.zeros((arr.shape[0], int(arr.shape[1]/2)), dtype=int)
#     oh = np.zeros((*(new.shape), n_labels**2), dtype=np.float32) # *(new.shape) here because .shape returns a tuple
#     # below only works because arr has been pre-processed to be from min-index to max-index
#     for i in range(0, arr.shape[1], 2):
#         new[:,int(i/2)] = n_labels*arr[:,i] + arr[:,i+1]
#     oh[np.arange(oh.shape[0])[:,None], np.arange(oh.shape[1]), new] = 1
#     # oh = oh.reshape((*new.shape, n_labels**2))
#     # Ex. oh of n**2 = 9, (batch,seq) [1,2],[3,4],[5,0] would be
#     # [[0,1,0,0,0,0,0,0,0], [0,0,1,0,0,0,0,0,0]
#     #  [0,0,0,1,0,0,0,0,0], [0,0,0,0,1,0,0,0,0]
#     #  [0,0,0,0,0,1,0,0,0], [1,0,0,0,0,0,0,0,0]]

#     # returns oh of shape seq_length, batch_size, n_labels**2
#     return oh.transpose(1,0,2)

# def target_to_double(target_arr, n_labels=total_labels):
#     target_arr = target_arr.transpose(1,0)
#     if (target_arr.shape[1]%2 == 1): target_arr = np.insert(target_arr, 0, 76, axis=1) # " " is 5, to make the new seq_length even
#     converted = np.zeros((target_arr.shape[0], int(target_arr.shape[1]/2)), dtype=int)
#     for i in range(0, target_arr.shape[1], 2):
#         converted[:,int(i/2)] = n_labels*target_arr[:,i] + target_arr[:,i+1]
#     return converted.transpose(1,0)

# def double_to_char(doubled, n_labels=total_labels):
#     doubled = doubled.transpose(1,0)
#     reverted = np.zeros((doubled.shape[0], int(doubled.shape[1]*2)), dtype=int)
#     for i in range(0, doubled.shape[1]):
#         reverted[:, i+1] = doubled[:, i] % n_labels
#         reverted[:, i] = (doubled[:, i] - reverted[:, i+1]) / n_labels
#     return reverted.transpose(1,0)## Encoding functions

## Batching functions

In [9]:
# Get a random sequence of the Shakespeare dataset.
def get_random_seq_and_target(arr, seq_length):
    start_index = random.randint(0, len(arr) - seq_length - 1)
    end_index   = start_index + seq_length + 1 - 1
    return arr[start_index:end_index], arr[start_index+1:end_index+1]

# Get a random paired sequence of the Shakespeare dataset.
# Iterates 2 indexes at a time
def pair_get_random_seq_and_target(arr, seq_length):
    increment = 2
    start_index = random.randint(0, len(arr) - 2*seq_length - increment)
    end_index   = start_index + 2*seq_length + increment - increment
    return arr[start_index:end_index:2], arr[start_index+increment:end_index+increment:2]
### WORTH PAYING ATTENTION TO INCREMENT OF TARGET - +1 vs +2 could make a huge difference here


def get_batches(arr, batch_size, seq_length, batches_per_iter, pair_mode = False, n_labels=total_labels):
    '''Arguments
       ---------
       arr: Total char array to make batches from, 1-D
       batch_size: the number of sequences per batch
       seq_length: number of encoded chars per sequence
       batches_per_iter: how many sets for batches per iter/epoch
       pair_mode: whether to collect from double_encode or not
       n_labels: the total number of possible labels
    '''

    # We want batch to be seq_length,batch_size (128,64)

    batch_size_total = batch_size * seq_length
    batch = np.zeros((seq_length, batch_size), dtype=int)
    target = np.zeros((seq_length, batch_size), dtype=int)

    for b in range(0, batches_per_iter):
        # iterate through the array, one random sequence at a time
        for n in range(0, batch_size):
            if pair_mode:
                batch[:,n], target[:,n] = pair_get_random_seq_and_target(arr, seq_length)
            else:
                batch[:,n], target[:,n] = get_random_seq_and_target(arr, seq_length)
        yield torch.from_numpy(one_hot_encode(batch, n_labels=n_labels).transpose(1,0,2)), torch.from_numpy(target.transpose(1,0))

## Training

In [10]:
def train_step(net, opt, loss_func, batch_size, input, target):
    # Initialize hidden state and gradients.
    hidden = net.init_hidden(batch_size)
    opt.zero_grad()

    # Forward pass.
    output, hidden = net(input, hidden)

    # Compute loss. Flatten output and target tensors and compute cross-entropy.
    loss = loss_func(output.reshape(-1, net.output_size), target.reshape(-1))

    # Backward pass and optimization.
    loss.backward()
    opt.step()

    return loss.item()

def train(net, data, pair_mode = False, epochs=10, batch_size=64, seq_length=128, lr=0.001, clip=5, val_frac=0.1, print_every=100):
    ''' Training a network

        Arguments
        ---------

        net: CharRNN_normal network
        data: text data to train the network
        pair_mode: whether to refer to double_encode or not
        epochs: Number of epochs.iters to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss

    '''
    all_losses = []
    loss_sum   = 0

    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    loss_func = torch.nn.CrossEntropyLoss()

    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    if(train_on_gpu):
        net.cuda()

    counter = 0
    'THIS AFFECTS INPUT SIZE'
    n_labels = len(net.labels)
    batches_per_iter=1000


    start_train = timeit.default_timer()

    for e in range(epochs):
        for input, target in get_batches(data, batch_size, seq_length, batches_per_iter, pair_mode=pair_mode, n_labels=n_labels):
            input, target = input.to(device), target.to(device) # Move to GPU memory.
            #print("input+target shape:", input.shape, target.shape)
            #input+target shape: torch.Size([128, 64, 65]) torch.Size([128, 64])

            loss      = train_step(net, opt, loss_func, batch_size, input, target)   # Calculate the loss.
            loss_sum += loss                                  # Accumulate the loss.

            counter += 1
            # Print the log.
            if counter % print_every == print_every - 1:
                print('iter:{}/{} loss:{}'.format(counter+1, batches_per_iter, loss_sum / print_every))
                #print('generated sequence: {}\n'.format(eval_step(net)))

                # Track the loss.
                all_losses.append(loss_sum / print_every)
                loss_sum = 0

        val_h = net.init_hidden(batch_size)
        val_losses = []
        net.eval()
        for input, target in get_batches(val_data, batch_size, seq_length, batches_per_iter=5, pair_mode=pair_mode, n_labels=n_labels):
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            val_h = tuple([each.data for each in val_h])

            if(train_on_gpu):
                input, target = input.cuda(), target.cuda()

            output, val_h = net(input, val_h)
            val_loss = loss_func(output.reshape(-1, net.output_size), target.reshape(-1))
            val_losses.append(val_loss.item())

        net.train() # reset to train mode after iterationg through validation data
        print("Epoch: {}/{}...".format(e+1, epochs),
              "Step: {}...".format(counter),
              "Loss: {:.4f}...".format(loss),
              "Val Loss: {:.4f}".format(np.mean(val_losses)))


    end_train = timeit.default_timer()
    print ("Training time elapsed:", end_train - start_train, "s")

    return loss_sum, all_losses

#### Initialize and train

In [11]:
### Normal CharRNN ###

# n_hidden = 300
# n_layers = 3
# dropout = 0.5
# batch_size = 64
# seq_length = 128
# n_epochs = 20
# learning_rate = 0.001

# total_labels = total_chars
# double = False

# net = CharRNN_normal(chars, n_hidden, n_layers)
# print(net)
# print(sum(p.numel() for p in net.parameters()))
# # train the model
# loss_sum, all_losses = train(net, encoded, epochs=n_epochs, batch_size=batch_size,
#                              seq_length=seq_length, lr=learning_rate, print_every=500)
#
# torch.save(net.state_dict(), 'SequenceRNN_params_ -layers h- .pth')
# with open('SequenceRNN_lossList_ -layers h- _loss- .pkl', 'wb') as file:
#     pickle.dump(all_losses, file)


### Double Encoded CharRNN ###

n_hidden = 500
n_layers = 1
dropout = 0.3
batch_size = 128
seq_length = 64
n_epochs = 15
learning_rate = 0.001

total_labels = total_pairs
double = True

netDouble = CharRNN_doubleEncode(pairs, n_hidden, n_layers, drop_prob=dropout)
print(netDouble)
print(sum(p.numel() for p in netDouble.parameters()))
# train the model
# loss_sum, all_losses = train(netDouble, double_encoded, pair_mode = True, epochs=n_epochs, batch_size=batch_size,
#                              seq_length=seq_length, lr=learning_rate, print_every=100)

# torch.save(netDouble.state_dict(), 'drive/MyDrive/Colab Notebooks/Word_RNN/DoubleRNN_params_3-layers h-300_iter2_cont.pth')
# with open('drive/MyDrive/Colab Notebooks/Word_RNN/DoubleRNN_lossList_3-layers h-300_loss- _iter2_cont.pkl', 'wb') as file:
#     pickle.dump(all_losses, file)


### Max Double Encoded CharRNN ###

# n_hidden = 300
# n_layers = 2
# dropout = 0.3
# batch_size = 64
# seq_length = 128
# n_epochs = 25
# learning_rate = 0.001

# total_labels = total_max_pairs
# double = True

# netMax = CharRNN_maxEncode(max_pairs, n_hidden, n_layers, drop_prob=dropout)
# print(netMax)
# print(sum(p.numel() for p in netMax.parameters()))
# # train the model
# # loss_sum, all_losses = train(netMax, max_encoded, pair_mode = True, epochs=n_epochs, batch_size=batch_size,
# #                              seq_length=seq_length, lr=learning_rate, print_every=100)

# torch.save(netMax.state_dict(), 'drive/MyDrive/Colab Notebooks/Word_RNN/MaxRNN_params_3-layers h-300_iter2.pth')
# with open('drive/MyDrive/Colab Notebooks/Word_RNN/MaxRNN_lossList_3-layers h-300_loss- _iter2.pkl', 'wb') as file:
#     pickle.dump(all_losses, file)

CharRNN_doubleEncode(
  (lstm): LSTM(1403, 500, batch_first=True, dropout=0.3)
  (dropout): Dropout(p=0.3, inplace=False)
  (linear): Linear(in_features=500, out_features=1403, bias=True)
)
4512903




#### Training loss curve

In [12]:
# plt.xlabel('iters')
# plt.ylabel('loss')
# plt.plot([loss for loss in all_losses])
# plt.show()

In [13]:
### Single encode ###
## 3 layers, 300 hidden, dropout 0.5, 0.001 lr, 20 epochs
#  909s train time, 1.271 train loss, 1.609 val loss
## blah
#  blah

### Double encode ###
## 2 layers, 300 hidden, dropout 0.2, 0.001 lr, 5 epochs
# 275s train time, 1.433 train lost, 1.694 val loss
## 3 layers, 400 hidden, dropout 0.3, 0,002 lr, 25 epochs, seq len 200
# 3 layers, 300 hidden, dropout 0.3, seq_len 150
# 5755 train time, 2.141 train loss, 3.975 val loss
# answers don't make much sense

## Text generation

In [14]:
netDouble.load_state_dict(torch.load('DoubleRNN_params_1-layers h-500_iter2.pth'))

RuntimeError: Error(s) in loading state_dict for CharRNN_doubleEncode:
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([2800, 1403]) from checkpoint, the shape in current model is torch.Size([2000, 1403]).
	size mismatch for lstm.weight_hh_l0: copying a param with shape torch.Size([2800, 700]) from checkpoint, the shape in current model is torch.Size([2000, 500]).
	size mismatch for lstm.bias_ih_l0: copying a param with shape torch.Size([2800]) from checkpoint, the shape in current model is torch.Size([2000]).
	size mismatch for lstm.bias_hh_l0: copying a param with shape torch.Size([2800]) from checkpoint, the shape in current model is torch.Size([2000]).
	size mismatch for linear.weight: copying a param with shape torch.Size([1403, 700]) from checkpoint, the shape in current model is torch.Size([1403, 500]).

In [None]:
# Evaluation step function.
def eval_step(net, init_seq='W', predicted_len=100, eval_batch_size=3):
    # Enter eval mode
    net.eval()

    # Initialize the hidden state, input and the predicted sequence.
    hidden        = net.init_hidden(eval_batch_size)
    encoded_seq = np.array([char2int[ch] for ch in init_seq])
    init_input    = torch.from_numpy(one_hot_encode(np.array(([encoded_seq]*eval_batch_size))
                                    .reshape(eval_batch_size, len(init_seq)).transpose(1,0)).transpose(1,0,2)).to(device)
    predicted_seq = np.array(([ch for ch in init_seq]*eval_batch_size)).reshape(eval_batch_size, len(init_seq))
    # predicted_seq.shape = (batch, seq, 0)

    # Use initial string to "build up" hidden state.
    #print(init_input[:,0,:].unsqueeze(1).shape)
    for t in range(len(init_seq) - 1):
        output, hidden = net(init_input[:,t,:].unsqueeze(1), hidden) # input shape (batch_size, seq_length, n_chars)

    # Set current input as the last character of the initial string.
    input = init_input[:,-1,:].unsqueeze(1)

    # Predict more characters after the initial string.
    for t in range(predicted_len):
        # Get the current output and hidden state.
        output, hidden = net(input, hidden)

        # Sample from the output as a multinomial distribution.
        predicted_index = tuple(torch.multinomial(output[:, :].exp(), 1)[:].flatten().tolist())
        #predicted_index = torch.multinomial(output[:, :].exp(), 1)[:].numpy()

        # Add predicted character to the sequence and use it as next input.
        predicted_chars  = [int2char[i] for i in predicted_index]
        predicted_seq = np.concatenate((predicted_seq, np.expand_dims(predicted_chars, axis=1)), axis=1)


        # Use the predicted character to generate the input of next round.
        input = torch.from_numpy(one_hot_encode(np.array(predicted_index)
                                .reshape(1, eval_batch_size)).transpose(1,0,2)).to(device)

    return [''.join(row) for row in predicted_seq]



# Pairwise evaluation step function.
# with iter=1, it produces staggered language... like double letters
def double_eval_step(net, init_seq='Wh', predicted_len=100, eval_batch_size=3):
    # Enter eval mode
    net.eval()

    init_seq = [init_seq[i:i+2] for i in range(0, len(init_seq), 2)]

    # Initialize the hidden state, input and the predicted sequence.
    hidden        = net.init_hidden(eval_batch_size)
    encoded_seq = np.array([double_char2int[ch] for ch in init_seq])
    init_input    = torch.from_numpy(one_hot_encode(np.array(([encoded_seq]*eval_batch_size))
                                    .reshape(eval_batch_size, len(init_seq)).transpose(1,0), n_labels=len(net.labels)).transpose(1,0,2)).to(device)
    predicted_seq = np.array(([ch for ch in init_seq]*eval_batch_size)).reshape(eval_batch_size, len(init_seq))
    # predicted_seq.shape = (batch, seq, 0)

    # Use initial string to "build up" hidden state.
    #print(init_input[:,0,:].unsqueeze(1).shape)
    for t in range(len(init_seq) - 1):
        output, hidden = net(init_input[:,t,:].unsqueeze(1), hidden) # input shape (batch_size, seq_length, n_chars)

    # Set current input as the last character of the initial string.
    input = init_input[:,-1,:].unsqueeze(1)

    counter=0
    # Predict more characters after the initial string.
    for t in range(predicted_len):
        counter += 1
        # Get the current output and hidden state.
        output, hidden = net(input, hidden)

        # Sample from the output as a multinomial distribution.
        predicted_index = tuple(torch.multinomial(output[:, :].exp(), 1)[:].flatten().tolist())
        #predicted_index = torch.multinomial(output[:, :].exp(), 1)[:].numpy()

        # Add predicted character to the sequence and use it as next input.
        predicted_chars  = [double_int2char[i] for i in predicted_index]
        'v TO AVOID DOUBLING PROBLEM, LIKELY FROM ITERATOR IN THE RANDOM SEQUENCE FUNCTION'
        #if counter%2 == 0:
        predicted_seq = np.concatenate((predicted_seq, np.expand_dims(predicted_chars, axis=1)), axis=1)


        # Use the predicted character to generate the input of next round.
        input = torch.from_numpy(one_hot_encode(np.array(predicted_index)
                                .reshape(1, eval_batch_size), n_labels=len(net.labels)).transpose(1,0,2)).to(device)

    return [''.join(row) for row in predicted_seq]

In [None]:
# ## from double L-3 H-300

# ROMEO:
# Nay, but mercy in the precious then.

# ROMEO:
# Ay, if you say you will not have your friends
# Where I bring that unseen not. Sir John Harry,
# And 'tis hear instantly not thank your troth,
# Percaint of lives, beggar garland's gentlemen,
# For one that very enemy to bear a side.

# HENRY BOLINGBROKE:
# 'Tis contrary and sylerith capted for.

# EDWARD:
# 'Tis not in law, by his tongue common passion.
# I take you to him into dispatch the cause;
# They'll now but loved our good friendly, lay to commeside
# Their long unhave, the grave! I will myself,
# Say, thought of her despaladis biving and their
# Hateows to what should.

# AUTOLYCUS:
# You have not perily is not the princes of the oracle' histe, and
# loss a trop at me; as every executioner,
# I seek the opposern old grave entereat.

# DUKE VINCENTIO:
# Rome of poor might be admoursed.

# First Servant:
# Or That, I will be strive, for ever have
# Though she got, nor shoulders. Why show'd your eyest,
# I'll kiss the humblin to have lose a care?
# The sake was authority, sir, say
# Than e'er that ambious for the field but
# If I say would attend your brithering ruast.

# Second Musician:
# Why, my grantam?

# POLIXENES:
# Would I infillt within these double loyalty
# We look'd us the conduct betwer, I have die?

# VOLUMNIA:
# Done, orfeny your life, and I have sent to gaze you.

# First Lord:
# I can have of itself. The stone offence these
# voices oft his life were with a old object
# Und on your lamenus of in Irivery,
# Made it reasons, who can law scarce come
# No more than it was by
# A waster men.

# CAMILLO:
# What hast, I will served too?

# AUFIDIUS:
# All enough for a day ber, if your false life
# Fan our leaves; if I am lost do titus
# Till wish'd suffering up for young; but he sets me,
# A greats of despair, say any throng for yet
# 'Twas you make arms, as it a propore,
# Swell I repers your name.

# GLOUCESTER:
# Forbear marry the of the aidings,
# Like a good fany to the earth, mather to the wars;
# Who though that I should first my brother doth,
# And the redese that spars to him to cotus.

# NORTHUMBERLAND

# -------------------------------------


# ROMEO:
# There have wear Name of you!

# LUCIO:

# ISABELLA:
# Marchann, the best march them: he was like the hath deny them;
# But all the rudent to officit wherein
# And that they vile thanks. What till it is thou need?
# I have done be sodder--resigns here well.

# GREGORY:
# O, learn'd this gone of worship to heavy.

# ISABELLA:
# Where he is banish'd? thou villake cartant
# Of a thang to speak: hears it, did breathing,
# And, madam, grand York and dece people as harm!
# Or that all the arms of my place?

# ABHORSON:
# 'Tis not not a town. I ne'er chang commanded of your
# bidth departs: 'ze would did patience is just
# contey'd by safety, speaks, where? it is born as
# canst the city resemblis their.

# ISABELLA:
# If I in thought our till you as I had it.
# I have from what you say, sits some offer.

# Otchment:
# You will not not, sir, be it perform'd father.

# First Servingman:
# I would a sun by way must hear it. There will prote it with
# these curlent; and therefore distast it, draw: to bring him
# the sea: beseech the lady.

# POLIXENES:
# There
# is exposs'd as there.

# POLIXENES:
# She's nothing;
# You bad me me to noble; if my faster.

# ANGELO:
# You have upon such a sentently.

# ANGELO:
# Yes; I am nute; my lord; what's the Tower?

# MENENIUS:
# The lady of the maid'st Mencurarlies:--ay,
# More, your would have aided so with it in our heat,--
# Perhate this cooges starle, fuspish peads hope;
# Which does be hand with the per it: in she from
# your true image and prosperous vanom'd yours.

# LEONTES:
# I have a noble captain.

# LEONTES:
# It will as sunder,
# I were runyly well to the Taugh yee.

# BRUTUS:
# Call all this there is city, come room myself.
# What, wilt you not?

# PETRUCHIO:
# Verina!'' this! O mistress! Most that.

# Shepherd:
# O, ask our proud, became to us and all embracemens.

# VALERIA:
# Ay, to what's that?

# Second Servant:
# Mare you a people dead; if for a doubt was hence of
# so;
# Six beceforce, or no to pratle I have, which hand
# Thou untie these coming you may content
# Unlike him to enough our table must
# I came to speak of every noddle.

# BRUTUS:

# -------------------------------------


# ROMEO:
# Let's live to be so me; too luties here,
# Thou art, with God's man, provost, wheely spits
# So throat from dream his brother's life.

# LADY GREY:
# And forbid remember her pursues;
# I love, if I am kings, good English crown
# And put the duke, and I have lose the stroke,
# That would be cured the charible pardon
# Of England's party at Frong so afcounter'd
# With renente, and wrought glaftey out.

# WARWICK:
# O, give you not steep: that love your lovings,
# Call preital not of his service of the
# duke.

# WARWICK:
# Learn with his hale I see hear on the utterant,
# Tullut a gubsd bedited slubbedom'd,
# If tidon with the scorts and good to speak;
# No wooth though 'twere to go: my uncle person.

# JOHN OF GAUNT:
# O, look not shall I know I can my love's soul.

# KING RICHARD III:
# Regriesome that elcome.

# KING HENRY VI:
# Uncle to your enemies
# Bonold in this dungry and fools, but first
# With letters of their hatters; and thy virdain
# Whose tempy pity I think that I can imbrance make
# A warnerre obscuing steeds, were committed,
# To neep her dear man, some sees in their
# Two from Lord Angelo 'O, what my love?

# LUCIO:
# Nay, as not yast, if I should accept me here.

# TRANIO:
# My liege Verans; bed some private hate,
# He fesimns and their was truth and resom
# Than vice thee an unsinished.

# VOLUMNIA:
# Nothing, by the own mecsast man.

# GREMIO:
# And so your company, sir! he is so young;
# If thou shalt be mine arm:
# That I might nonelo, perjury, there must not,
# And here is made a black disgrace again,
# For not a skith on fitness again, althous
# Is to proclaim these trerbon my little.

# CLARENCE:
# Untul vice with me, bites can much but some
# nurse never hear the prince, be gone only stands.

# GLOUCESTER:
# The rebel of his city garden near,
# Lest I let me wide a weake a ground
# As well in charge in you.

# YORK:
# Ay, which our spirits and my shame's withers:
# As I was much yours; and uncle, therefore alack,
# Hold'd it he shall bring with thy issue;
# Placelyhood freectune is hers.

# CLIFFORD:
# To me, and thou say, if thou canst know't
# For love; g

# -------------------------------------


# ROMEO:
# Pleasant yourselacte! pardon;
# Who shall be before to him? I am the old
# kings with G,
# This my life, she will not fly.

# Second King of Bonding most house,
# I pray you, that you can, i' the bawd afrang:
# Or by the Margaret was prove from these,
# And so, some deceived a body of shake,
# More Clarence, fres oth too.

# TRANIO:
# Thou hast neath against my lady's fearful royal;
# Thy devil they cannot destence on death,
# And in thy life is strength-bach'd. They have wrong
# Like a thing exprimit? yet thou hast haste:
# And these he finds us most along to leave,
# Imliket then tash my laws.

# First Senator:
# What says I to my fault to hear my oath?

# CAPULET:
# What send your brothers sudden but a duke?

# Servant:
# Where's no wife that the good bound from hence?

# NORTHUMBERLAND:
# Why, come from Bustard Sain, by chair as scen
# The tribunes of the proud, sin bal of with him,
# Mark'd in the happy like a speech, promits,
# The cursting causes out in old meavens night
# And havan but that then changel'd there will not
# Return to his life. And what have done?

# Nurse:
# Ah, on that; well shall be the
# right in suffering.

# BUSHY:
# For put we buy by I.

# QUEEN MARGARET:
# I am not doly did charmhine sing:
# But the thought it before, Cogain! God never!

# MONTAGUE:
# That's her, the Briston manly lord was lad,
# And left all the accusation
# To take us, now to make me to the brother:
# And as I lived this will want in one,
# From only a king, who begg'd my liege,
# And let it like a grave our murdering sport--
# I'll hence in thy kingdom her blood now.

# Third Murderer:
# Nends the poor sovereign and thy ancient strong beman
# Toge to good and a bear, arm;
# And quiln the move, what thou shouldst comed fort?
# That hate a treasong fointroo'd deary prove
# Boromine of that washs of the high fellow.
# Go both o' the messet with him, I give my spider'd
# Come to be upon me; I forswell awral!
# And where's not with our company? What airy his ston?
# Thou happy'st on mine eyes, and or with remair,
# And what they cries my father's sears
# They fight to sight, and

# -------------------------------------


# ROMEO:
# And o have at that dence.

# PETRUCHIO:
# Villain, I cannoid your talk be sudden,
# Since that we see me with the way
# And he to use him how it by Vilena,
# But staft upon the passy; doubet you, my lord,
# To take tongue out honour.

# FLORIZEL:
# I would all his me well:
# How fares the fair? I'll read?

# LADY CAPULET:
# Good for sone, unlike, his revenge, good,
# Hath wold away be stars.

# ANGELO:
# Well, let's their cause blow: we
# now: but you are men, I made to get thee as
# you may be sixture about on to oppeigr
# from our friends: who, and he stands already,
# Away; and pereless cooting older,
# That is by charges and want of a lasting.
# Ha! suet deserved to their awful soul.

# BENVOLIO:
# Then almost been in thy disonger word,
# Than on a cunsed ere her royal villain,
# Were they with daughters bres our feeling deedness.
# Loves and kind most refer them.

# JULIET:
# What seest we revolt about it? On what is,
# By rudely but have but be beasted not it
# Like to deny in to shape but her bottle,
# Who in resolves my body's rain to be.

# Second Comes:
# Good Crudinalains, selly lost must confess;
# On this suns of no out for this well after.

# LUCIO:
# More thus, but being always as that the people
# Hath so dear of England's king and something
# To wake up seem, and cut't not abserve
# And bold this falling fall
# Mont traathe bed, uncold supplats of ince,
# By what appear her kindred for our joy.

# RIVERS:
# To faster now of his army;
# King of Brains, I slew the meaning of his eyes,
# Her friends am Edward, who art the fiend of fine?
# It is the fearful murdering his kinsman.
# Thou wert at freely and be a noble being fair:
# And hence, and craft hither To mistress,
# Therefore seem or an oath enjoy me as any
# Bright Rome, how would the fight 'twixt neables,
# So many fits a shame, there tried from me,
# And to desire to be untorcuress.

# BALTHASAR:
# I will become an other fair kinsmen;
# As gentle soul will peace us methings roth!
# Had am art home?

# DUKE VINCENTIO:
# A praise her hunger than the
# trive of mine to whose death hath ents:
# Go you me: let th

# -------------------------------------


In [None]:
batches = 5
#generated_seqs = eval_step(net, pair_mode = double, init_seq='ROMEO', predicted_len=200, eval_batch_size=batches)
generated_seqs = double_eval_step(netMax, init_seq='When', predicted_len=300, eval_batch_size=batches)


for i in range(batches):
    print(generated_seqs[i])
    print("\n-------------------------------------\n")

In [None]:
# def predict(net, char, h=None, top_k=None):
#         ''' Given a character, predict the next character.
#             Returns the predicted character and the hidden state.
#         '''

#         # tensor inputs
#         x = np.array([[net.char2int[char]]])
#         x = one_hot_encode(x, len(net.chars))
#         inputs = torch.from_numpy(x)

#         if(train_on_gpu):
#             inputs = inputs.cuda()

#         # detach hidden state from history
#         h = tuple([each.data for each in h])
#         # get the output of the model
#         out, h = net(inputs, h)

#         # get the character probabilities
#         p = F.softmax(out, dim=1).data
#         if(train_on_gpu):
#             p = p.cpu() # move to cpu

#         # get top characters
#         if top_k is None:
#             top_ch = np.arange(len(net.chars))
#         else:
#             p, top_ch = p.topk(top_k)
#             top_ch = top_ch.numpy().squeeze()

#         # select the likely next character with some element of randomness
#         p = p.numpy().squeeze()
#         char = np.random.choice(top_ch, p=p/p.sum())

#         # return the encoded value of the predicted char and the hidden state
#         return net.int2char[char], h

# def sample(net, size, prime='The', top_k=None):

#     if(train_on_gpu):
#         net.cuda()
#     else:
#         net.cpu()

#     net.eval() # eval mode

#     # First off, run through the prime characters
#     chars = [ch for ch in prime]
#     h = net.init_hidden(1)
#     for ch in prime:
#         char, h = predict(net, ch, h, top_k=top_k)

#     chars.append(char)

#     # Now pass in the previous character and get a new one
#     for ii in range(size):
#         char, h = predict(net, chars[-1], h, top_k=top_k)
#         chars.append(char)

#     return ''.join(chars)

# print(sample(net, 1000, prime='JULIET', top_k=5))