In [1]:
import gc
import pickle

import torch
import numpy as np

from torch import nn
from torch.nn import functional as F
from torch import optim
from torchinfo import summary

from collections import namedtuple
import PyPDF3

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
print(device)

cpu


In [4]:
with open('anna.txt', 'r') as file:
    text = file.read()

In [5]:
text[:120]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion i'

In [6]:
def encode_text(text, extend = True, unique_chars = None):
    result_tuple = namedtuple('results', ['encoded_text', 'unique_char', 'int2char', 'char2int'])
    
    if unique_chars is None:
        unique_chars = list(set(text).union(set('#[]{}+-*=!')))
    if extend:
        unique_chars.extend(list('#[]{}+-*=!'))
        
    char2int = {char : unique_chars.index(char) for char in unique_chars}
    int2char = {v : k for (k, v) in char2int.items()}
    
    encoded_text = np.array(list(map(lambda x: char2int[x], list(text))))
    
    return result_tuple(encoded_text, unique_chars, int2char, char2int)

In [7]:
batch_size = 32
seq_length = 16

In [8]:
numel_seq = batch_size * seq_length

In [9]:
numel_seq

512

In [10]:
def batch_sequence(arr, batch_size, seq_length):
    numel_seq = batch_size * seq_length
    num_batches = arr.size // numel_seq
    
    arr = arr[: num_batches * numel_seq].reshape(batch_size, -1)
    #print(arr.shape)
    
    batched_data = [(arr[:, n : n + seq_length], arr[:, n + 1 : n + 1 + seq_length])
                    for n in range(0, arr.shape[1], seq_length)]
    
    ### Finalize final array size
    batched_data[-1] = (batched_data[-1][0],
                        np.append(batched_data[-1][1], batched_data[0][1][:, 0].reshape(-1, 1), axis = 1))
    
    ###batched_arr = [arr[n : n + numel_seq].reshape(batch_size, seq_length) for n in range(num_batches)]
    return iter(batched_data), num_batches

In [11]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

\begin{array}{ll} \\
        i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
        f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
        o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
        c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
        h_t = o_t \odot \tanh(c_t) \\
    \end{array}

\begin{aligned}
N ={} & \text{batch size} \\
L ={} & \text{sequence length} \\
D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\
H_{in} ={} & \text{input_size} \\
H_{cell} ={} & \text{hidden_size} \\
H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\
\end{aligned}

In [12]:
print(help(nn.LSTM))

Help on class LSTM in module torch.nn.modules.rnn:

class LSTM(RNNBase)
 |  LSTM(*args, **kwargs)
 |  
 |  Applies a multi-layer long short-term memory (LSTM) RNN to an input
 |  sequence.
 |  
 |  
 |  For each element in the input sequence, each layer computes the following
 |  function:
 |  
 |  .. math::
 |      \begin{array}{ll} \\
 |          i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
 |          f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
 |          g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
 |          o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
 |          c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
 |          h_t = o_t \odot \tanh(c_t) \\
 |      \end{array}
 |  
 |  where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell
 |  state at time `t`, :math:`x_t` is the input at time `t`, :math:`h_{t-1}`
 |  is the hidden state of the layer at time `t-1` or the initial hidden
 |  state a

In [13]:
def get_text(fpath):
    with open(fpath, "rb") as f:
        pdf = PyPDF3.PdfFileReader(f)
        text = str()
        for page_num in range(pdf.numPages):
            page = pdf.getPage(page_num)
            text = text + ' ' + page.extractText()
    return text

In [14]:
with open('unique_char.pkl', 'rb') as f:
    unique_chars = pickle.load(f)
    
with open('weights.pt', 'rb') as f:
    info = torch.load(f, map_location = torch.device('cpu') )

In [15]:
unique_chars

['2',
 '4',
 'u',
 'V',
 'Q',
 '@',
 'l',
 'F',
 '`',
 '6',
 'f',
 ' ',
 'C',
 'b',
 'x',
 '(',
 'H',
 'I',
 'X',
 'S',
 '=',
 '?',
 'L',
 'M',
 '{',
 'P',
 'k',
 'q',
 '*',
 ',',
 'y',
 'A',
 'B',
 'D',
 'W',
 'G',
 'Y',
 '9',
 '+',
 'J',
 'o',
 'z',
 ':',
 'N',
 ')',
 '3',
 'n',
 '-',
 'c',
 'g',
 '5',
 '[',
 ';',
 '7',
 '"',
 'a',
 'e',
 'R',
 'E',
 '\n',
 'h',
 ']',
 '1',
 '0',
 'm',
 'Z',
 'r',
 'j',
 '$',
 '8',
 '#',
 'p',
 'v',
 '}',
 'i',
 'd',
 '!',
 'U',
 't',
 'T',
 '_',
 's',
 '&',
 'O',
 '.',
 'K',
 "'",
 'w',
 '/',
 '%',
 '#',
 '[',
 ']',
 '{',
 '}',
 '+',
 '-',
 '*',
 '=',
 '!']

In [16]:
info

{'model_state_dict': OrderedDict([('lstm.weight_ih_l0',
               tensor([[ 0.0153, -0.0876,  0.1679,  ...,  0.0518, -0.0367, -0.0339],
                       [-0.1801,  0.0243,  0.0953,  ..., -0.0628,  0.0793,  0.0684],
                       [-0.2093,  0.0910,  0.6245,  ..., -0.0290,  0.0812, -0.0207],
                       ...,
                       [ 0.1932, -0.1144, -0.2806,  ...,  0.0126,  0.0307,  0.0137],
                       [ 0.2034,  0.1398,  0.4949,  ...,  0.0112,  0.0776,  0.0411],
                       [ 0.0148,  0.1126,  0.0408,  ..., -0.0366,  0.0138,  0.0754]])),
              ('lstm.weight_hh_l0',
               tensor([[ 0.2525,  0.0819, -0.1941,  ..., -0.1613,  0.0800, -0.1786],
                       [ 0.4471,  0.2502, -0.4032,  ..., -0.0685,  0.2477, -0.5673],
                       [ 0.1754, -0.2310, -0.3944,  ..., -0.2395, -0.0897,  0.0155],
                       ...,
                       [ 0.2115, -0.0121, -0.0146,  ..., -0.3699,  0.0284, -0.2815],

In [17]:
info.keys()

dict_keys(['model_state_dict', 'optimizer_state_dict', 'epoch', 'train_loss', 'test_loss'])

In [18]:
### Get text for validation data
val_text = get_text("The-Prince.pdf")

In [19]:
### Encode train data
encoded_text, _, _, _ = encode_text(text, False, unique_chars)

In [20]:
### Encode validation data
encoding_results = encode_text(val_text, False, unique_chars)
encoded_val = encoding_results.encoded_text

In [21]:
unique_char = encoding_results.unique_char
len(unique_char)

100

In [22]:
class CharRNN(nn.Module):
    """
    Character-level LSTM.
    
    Parameters
    ----------
    hidden_size:
        Number of output features for LSTM.
    dropout:
        Dropout probabilityfor LSTM.
    batch_size:
        Number of sequences in a batch.
    D:
        Number of directions: uni- or bidirectional architecture for LSTM.
    num_layers:
        Number of LSTM stacks.
    
    Returns
    -------
    output:
        Shape: [batch_size, sequence_length, num_features]
    hidden_state:
        Tuple containing:
        - Short-term hidden state
            Shape: [batch_size, sequence_length, num_features]
        - Cell state
            Shape: [batch_size, sequence_length, num_features]
    
    """
    def __init__(self, hidden_size = 128, dropout = 0.25,
                 batch_size = 32, D = 1, num_layers = 2):
        
        super(CharRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.dropout_rate = dropout
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.D = D
        
        self.lstm = nn.LSTM(input_size = len(unique_chars), hidden_size = self.hidden_size,
                            dropout = self.dropout_rate, batch_first = True,
                            bidirectional = True if self.D == 2 else False, bias = True,
                            num_layers = self.num_layers)
        
        self.fc = nn.Linear(self.D*self.hidden_size, len(unique_chars))
        
    def forward(self, x, hidden_state):
        outputs, hidden_state = self.lstm(x, hidden_state)
        outputs = outputs.contiguous().view(-1, self.D*self.hidden_size)
        outputs = self.fc(outputs)
        
        return outputs, hidden_state
    
    def init_hidden_state(self, mean, stddev):
        """
        Initialize hidden state and context tensors.
        """
        
        h = torch.distributions.Normal(mean, stddev).sample((self.D*self.num_layers, self.batch_size, self.hidden_size))
        c = torch.distributions.Normal(mean, stddev).sample((self.D*self.num_layers, self.batch_size, self.hidden_size))
        
        return (h, c)
        

In [23]:
model = CharRNN(D = 1)

In [24]:
batch_size = 32
seq_length = 16

max_norm = 1.5
epochs = 5
lr = 1e-4

In [25]:
print(model)

CharRNN(
  (lstm): LSTM(100, 128, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=128, out_features=100, bias=True)
)


In [26]:
model.load_state_dict(info['model_state_dict'])

<All keys matched successfully>

In [27]:
model.to(device)

CharRNN(
  (lstm): LSTM(100, 128, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=128, out_features=100, bias=True)
)

In [28]:
### Objective functions and optimizer
opt = optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()

In [29]:
opt.load_state_dict(info['optimizer_state_dict'])

In [30]:
dir(opt)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_hook_for_profile',
 '_zero_grad_profile_name',
 'add_param_group',
 'defaults',
 'load_state_dict',
 'param_groups',
 'state',
 'state_dict',
 'step',
 'zero_grad']

In [31]:
opt.param_groups[0]['lr'] = 1e-5

In [32]:
gc.collect()

11662

In [33]:
type(encoded_text)

numpy.ndarray

In [34]:
### Train data
_, num_batches = batch_sequence(encoded_text, batch_size, seq_length)

In [35]:
### Validation data
_, num_batches_ = batch_sequence(encoded_val, batch_size, seq_length)

In [36]:
### Num of train batches
print(num_batches)

3877


In [37]:
### Num of valid batches
print(num_batches_)

551


In [None]:
### Outer training loop
for epoch in range(1, epochs + 1):
    h = model.init_hidden_state(mean = 0., stddev = .5)
    iteration = 0
    train_losses = list()
    
    ### Inner training loop
    for X, y in batch_sequence(encoded_text, batch_size, seq_length)[0]:
        X = one_hot_encode(X, len(unique_chars))
        X, y = torch.as_tensor(X).to(device), torch.as_tensor(y).to(device)
        
        model.train()
        iteration += 1
        
        h = tuple([each.data.to(device) for each in h])
        opt.zero_grad()
        
        outputs, h = model(X, h)
        
        loss = criterion(outputs, y.reshape(-1,).long())
        
        loss.backward(retain_graph = True)
        nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        opt.step()
        
        train_losses.append(loss.item())
        
        ### Outer validation loop
        if (not iteration % 20) or (iteration == num_batches):
            i = 0
            val_losses = list()
            model.eval()
            h_ = model.init_hidden_state(mean = 0., stddev = .5)
            
            ### Inner validation loop
            for X_, y_ in batch_sequence(encoded_val, batch_size, seq_length)[0]:
                i += 1
                
                h_ = tuple([each.data.to(device) for each in h_])
                
                X_ = torch.as_tensor(one_hot_encode(X_, len(unique_chars))).to(device)
                y_ = torch.as_tensor(y_).to(device)
                
                outputs_, h_ = model(X_, h_)
                
                val_loss = criterion(outputs_, y_.reshape(-1,).long())
                val_losses.append(val_loss.item())

            ### Report training and validation losses
            val_loss = torch.Tensor(val_losses).mean().item()

            train_loss = torch.Tensor(train_losses).mean().item()
            
            print('='*80)
            print(f'Epoch: {epoch}/{epochs}, Iteration {iteration}/{num_batches},',
                  f'Train Loss: {train_loss:.4f}, Valid Loss: {val_loss:.4f}')
        
    print('\n'+'='*80)
    print('='*80)
    #print('='*60)
    #print(f'Epoch: {epoch}/{epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {val_loss:.4f}\n')
    #print('='*60)

Epoch: 1/5, Iteration 20/3877, Train Loss: 1.5804, Valid Loss: 1.8867
Epoch: 1/5, Iteration 40/3877, Train Loss: 1.5696, Valid Loss: 1.8858
Epoch: 1/5, Iteration 60/3877, Train Loss: 1.5565, Valid Loss: 1.8852
Epoch: 1/5, Iteration 80/3877, Train Loss: 1.5509, Valid Loss: 1.8850
Epoch: 1/5, Iteration 100/3877, Train Loss: 1.5512, Valid Loss: 1.8846
Epoch: 1/5, Iteration 120/3877, Train Loss: 1.5526, Valid Loss: 1.8849
Epoch: 1/5, Iteration 140/3877, Train Loss: 1.5493, Valid Loss: 1.8850
Epoch: 1/5, Iteration 160/3877, Train Loss: 1.5453, Valid Loss: 1.8849
Epoch: 1/5, Iteration 180/3877, Train Loss: 1.5421, Valid Loss: 1.8852
Epoch: 1/5, Iteration 200/3877, Train Loss: 1.5378, Valid Loss: 1.8850
Epoch: 1/5, Iteration 220/3877, Train Loss: 1.5377, Valid Loss: 1.8850
Epoch: 1/5, Iteration 240/3877, Train Loss: 1.5360, Valid Loss: 1.8853
Epoch: 1/5, Iteration 260/3877, Train Loss: 1.5342, Valid Loss: 1.8856
Epoch: 1/5, Iteration 280/3877, Train Loss: 1.5345, Valid Loss: 1.8859
Epoch: 1/5

Epoch: 1/5, Iteration 1100/3877, Train Loss: 1.5337, Valid Loss: 1.8833
Epoch: 1/5, Iteration 1120/3877, Train Loss: 1.5335, Valid Loss: 1.8830
Epoch: 1/5, Iteration 1140/3877, Train Loss: 1.5333, Valid Loss: 1.8833
Epoch: 1/5, Iteration 1160/3877, Train Loss: 1.5341, Valid Loss: 1.8837
Epoch: 1/5, Iteration 1180/3877, Train Loss: 1.5340, Valid Loss: 1.8836
Epoch: 1/5, Iteration 1200/3877, Train Loss: 1.5337, Valid Loss: 1.8834
Epoch: 1/5, Iteration 1220/3877, Train Loss: 1.5335, Valid Loss: 1.8830
Epoch: 1/5, Iteration 1240/3877, Train Loss: 1.5332, Valid Loss: 1.8828
Epoch: 1/5, Iteration 1260/3877, Train Loss: 1.5335, Valid Loss: 1.8829
Epoch: 1/5, Iteration 1280/3877, Train Loss: 1.5337, Valid Loss: 1.8832
Epoch: 1/5, Iteration 1300/3877, Train Loss: 1.5340, Valid Loss: 1.8832
Epoch: 1/5, Iteration 1320/3877, Train Loss: 1.5337, Valid Loss: 1.8834
Epoch: 1/5, Iteration 1340/3877, Train Loss: 1.5332, Valid Loss: 1.8839
Epoch: 1/5, Iteration 1360/3877, Train Loss: 1.5329, Valid Loss:

Epoch: 1/5, Iteration 2180/3877, Train Loss: 1.5306, Valid Loss: 1.8825
Epoch: 1/5, Iteration 2200/3877, Train Loss: 1.5309, Valid Loss: 1.8821
Epoch: 1/5, Iteration 2220/3877, Train Loss: 1.5311, Valid Loss: 1.8820
Epoch: 1/5, Iteration 2240/3877, Train Loss: 1.5311, Valid Loss: 1.8820
Epoch: 1/5, Iteration 2260/3877, Train Loss: 1.5311, Valid Loss: 1.8822
Epoch: 1/5, Iteration 2280/3877, Train Loss: 1.5310, Valid Loss: 1.8827
Epoch: 1/5, Iteration 2300/3877, Train Loss: 1.5314, Valid Loss: 1.8827
Epoch: 1/5, Iteration 2320/3877, Train Loss: 1.5313, Valid Loss: 1.8827
Epoch: 1/5, Iteration 2340/3877, Train Loss: 1.5316, Valid Loss: 1.8826
Epoch: 1/5, Iteration 2360/3877, Train Loss: 1.5316, Valid Loss: 1.8831
Epoch: 1/5, Iteration 2380/3877, Train Loss: 1.5317, Valid Loss: 1.8833
Epoch: 1/5, Iteration 2400/3877, Train Loss: 1.5317, Valid Loss: 1.8831
Epoch: 1/5, Iteration 2420/3877, Train Loss: 1.5318, Valid Loss: 1.8834
Epoch: 1/5, Iteration 2440/3877, Train Loss: 1.5319, Valid Loss:

Epoch: 1/5, Iteration 3260/3877, Train Loss: 1.5336, Valid Loss: 1.8827
Epoch: 1/5, Iteration 3280/3877, Train Loss: 1.5336, Valid Loss: 1.8824
Epoch: 1/5, Iteration 3300/3877, Train Loss: 1.5337, Valid Loss: 1.8825
Epoch: 1/5, Iteration 3320/3877, Train Loss: 1.5335, Valid Loss: 1.8824
Epoch: 1/5, Iteration 3340/3877, Train Loss: 1.5336, Valid Loss: 1.8820
Epoch: 1/5, Iteration 3360/3877, Train Loss: 1.5337, Valid Loss: 1.8819
Epoch: 1/5, Iteration 3380/3877, Train Loss: 1.5338, Valid Loss: 1.8821
Epoch: 1/5, Iteration 3400/3877, Train Loss: 1.5341, Valid Loss: 1.8817
Epoch: 1/5, Iteration 3420/3877, Train Loss: 1.5345, Valid Loss: 1.8811
Epoch: 1/5, Iteration 3440/3877, Train Loss: 1.5346, Valid Loss: 1.8809
Epoch: 1/5, Iteration 3460/3877, Train Loss: 1.5348, Valid Loss: 1.8810
Epoch: 1/5, Iteration 3480/3877, Train Loss: 1.5350, Valid Loss: 1.8816
Epoch: 1/5, Iteration 3500/3877, Train Loss: 1.5352, Valid Loss: 1.8814
Epoch: 1/5, Iteration 3520/3877, Train Loss: 1.5353, Valid Loss:

Epoch: 2/5, Iteration 440/3877, Train Loss: 1.5296, Valid Loss: 1.8810
Epoch: 2/5, Iteration 460/3877, Train Loss: 1.5310, Valid Loss: 1.8812
Epoch: 2/5, Iteration 480/3877, Train Loss: 1.5308, Valid Loss: 1.8815
Epoch: 2/5, Iteration 500/3877, Train Loss: 1.5312, Valid Loss: 1.8813
Epoch: 2/5, Iteration 520/3877, Train Loss: 1.5308, Valid Loss: 1.8812
Epoch: 2/5, Iteration 540/3877, Train Loss: 1.5298, Valid Loss: 1.8813
Epoch: 2/5, Iteration 560/3877, Train Loss: 1.5307, Valid Loss: 1.8813
Epoch: 2/5, Iteration 580/3877, Train Loss: 1.5299, Valid Loss: 1.8809
Epoch: 2/5, Iteration 600/3877, Train Loss: 1.5315, Valid Loss: 1.8811
Epoch: 2/5, Iteration 620/3877, Train Loss: 1.5311, Valid Loss: 1.8809
Epoch: 2/5, Iteration 640/3877, Train Loss: 1.5303, Valid Loss: 1.8807
Epoch: 2/5, Iteration 660/3877, Train Loss: 1.5308, Valid Loss: 1.8808
Epoch: 2/5, Iteration 680/3877, Train Loss: 1.5323, Valid Loss: 1.8808
Epoch: 2/5, Iteration 700/3877, Train Loss: 1.5327, Valid Loss: 1.8807
Epoch:

Epoch: 2/5, Iteration 1520/3877, Train Loss: 1.5290, Valid Loss: 1.8817
Epoch: 2/5, Iteration 1540/3877, Train Loss: 1.5286, Valid Loss: 1.8821
Epoch: 2/5, Iteration 1560/3877, Train Loss: 1.5287, Valid Loss: 1.8814
Epoch: 2/5, Iteration 1580/3877, Train Loss: 1.5285, Valid Loss: 1.8808
Epoch: 2/5, Iteration 1600/3877, Train Loss: 1.5285, Valid Loss: 1.8810
Epoch: 2/5, Iteration 1620/3877, Train Loss: 1.5280, Valid Loss: 1.8806
Epoch: 2/5, Iteration 1640/3877, Train Loss: 1.5281, Valid Loss: 1.8807
Epoch: 2/5, Iteration 1660/3877, Train Loss: 1.5277, Valid Loss: 1.8809
Epoch: 2/5, Iteration 1680/3877, Train Loss: 1.5269, Valid Loss: 1.8811
Epoch: 2/5, Iteration 1700/3877, Train Loss: 1.5268, Valid Loss: 1.8808
Epoch: 2/5, Iteration 1720/3877, Train Loss: 1.5269, Valid Loss: 1.8809
Epoch: 2/5, Iteration 1740/3877, Train Loss: 1.5266, Valid Loss: 1.8807
Epoch: 2/5, Iteration 1760/3877, Train Loss: 1.5270, Valid Loss: 1.8807
Epoch: 2/5, Iteration 1780/3877, Train Loss: 1.5273, Valid Loss:

Epoch: 2/5, Iteration 2600/3877, Train Loss: 1.5299, Valid Loss: 1.8807
Epoch: 2/5, Iteration 2620/3877, Train Loss: 1.5298, Valid Loss: 1.8807


In [None]:
with open('new-weights.net', 'wb') as f:
    torch.save({'model_state_dict' : model.state_dict(),
                'optimizer_state_dict' : opt.state_dict(),
               }, f)