In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np

In [2]:
with open('data/anna.txt', 'r') as f:
    text = f.read()

In [3]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [4]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# encode the text
encoded = np.array([char2int[ch] for ch in text])
encoded

array([44, 24, 65, ..., 13, 16, 19])

In [5]:
len(encoded), len(text)

(1985223, 1985223)

In [6]:
def one_hot_encode(arr, n_labels):
    
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    print(one_hot)
    print(f'===================================')
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
    print(one_hot)
    print(f'=============={one_hot.shape}=====================')
    
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    print(one_hot.shape)
    
    return one_hot

In [7]:
test_seq = np.array([[3, 5, 1]])

one_hot = one_hot_encode(test_seq, 8)
print(one_hot)

[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
[[0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]]
(1, 3, 8)
[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [8]:
def get_batches(arr, batch_size, seq_length):
    
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    ## TODO: Get the number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    ## TODO: Keep only enough characters to make full batches
    arr = arr[:n_batches*batch_size_total]
    
    ## TODO: Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    ## TODO: Iterate over the batches using a window of size seq_length
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:n + seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [9]:
batches = get_batches(encoded, 8, 50)
x, y = next(batches)

In [10]:
x

array([[44, 24, 65, ...,  8,  6, 60],
       [13, 42, 50, ..., 27, 60, 22],
       [ 6, 50, 81, ..., 24, 65,  8],
       ...,
       [ 1, 21, 13, ..., 22,  1, 24],
       [33, 30, 50, ..., 65,  0, 33],
       [17, 51, 53, ...,  0, 42, 33]])

In [11]:
y

array([[24, 65,  8, ...,  6, 60, 24],
       [42, 50, 33, ..., 60, 22, 62],
       [50, 81, 33, ..., 65,  8, 50],
       ...,
       [21, 13, 13, ...,  1, 24,  6],
       [30, 50, 50, ...,  0, 33, 13],
       [51, 53, 42, ..., 42, 33,  6]])

In [12]:
x.shape, y.shape

((8, 248150), (8, 248150))

In [13]:
train_on_gpu = torch.cuda.is_available()

if train_on_gpu:
    print(f'GPU is available')
else:
    print(f'GPU is not available')

GPU is not available


In [15]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=2,
                 drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.lr = lr
        self.n_hidden = n_hidden
        
        #creating character dictionaries
        self.chars = token
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii in self.int2char.items()}
        
        ## TODO: define the layer of the model
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers,
                            dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
    def forward(self, x, hidden):
        
        r_out_put, hidden = self.lstm(x, hidden)
        out = self.dropout(r_output)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
            
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
            
        return hidden