# Base class for layers

In [1]:
# Import
import numpy as np
import matplotlib.pyplot as plt

In [38]:
class Module:
    
    def __init__(self, seq_len, hidden_sz, vocab_sz, weights):
        self.seq_len = seq_len
        self.hidden_sz = hidden_sz
        self.vocab_sz = vocab_sz
        
        self.params = dict() # Dictionary of parameters including weights and gradients
        self.hidden = dict() # Dictionary of hidden
        self.sm_ps = dict()  # Dictionary of softmax pro values
        
        self.make_params_dict(weights)
        
        # Initialize weights, hidden and cell states.
        self.init_weights()
        #self.reset_hidden()
        
        # Start with zero loss
        #self.loss = 0
        
    def make_params_dict(self, weights):
        """
        Set up the dictionary to contain the model weights, biases and gradients.
        Based on the weights the model needs.
        """
        
        for weight in weights:
            
            if weight == 'Why':
                size = (self.vocab_sz, self.hidden_sz)
            elif 'B' in weight and weight != 'By':
                size = (self.hidden_sz, 1)
            elif 'B' in weight and weight == 'By':
                size = (self.vocab_sz, 1)
            else:
                size = (self.hidden_sz, self.vocab_sz)
            
            self.params[weight] = {'size': size}
        
    def init_weights(self):
        """
        Initializes weights and biases based on the inputs hidden sz and
        vocab_sz.
        """
        
        for param in self.params:    
            # Initialize a weight matrix
            x,y = self.params[param]['size']
            if y != 1:
                self.params[param]['weight'] = np.random.randn(x, y) * 0.01
            # Initialize a bias
            else:
                self.params[param]['bias'] = np.zeros((x, y))
                
    def init_grads(self):
        """
        Initialize gradients for biases and weights
        """

    def init_adagrad_mem(self):
        """
        Initialize memory matrices needed for Adagrad.
        """

    def update_grads(self, optimizer, lr):
        """
        Update gradients based on the optimizer you choose.
        """

    def reset_hidden(self):
        """
        Reset hidden layers and possible cell state
        """

    def plot_losses(self):
        """
        Plot the cross entropy loss against the number of training sequences
        """

        if hasattr(self, 'losses'):
            plt.plot(self.losses)
            plt.xlabel('Number of training sequences')
            plt.ylabel('Cross Entropy Loss')
            plt.show()
        else:
            print('Error: No losses recorded, train the model!')

    def train(self, data, optimizer, lr, epochs, progress=True):
        """
        Train the model by chopping the data in sequences followed by performing
        the forward pass, backward pass and update the gradients.
        """
        self.losses = []
        smooth_loss = -np.log(1.0 / self.vocab_sz) * self.seq_length # Loss at iteration 0

        # Loop over the amount of epochs
        for epoch in range(epochs):
            n = 0

            # Reset hidden state
            self.reset_hidden()

            data_len = len(data)

            # Loop over amount of sequences in the data
            sequences_amount = int(data_len // self.seq_length)
            for j in range(sequences_amount):

                start_pos = self.seq_length * j

                # Embed the inputs and targets
                xs = [char_to_idx[ch] for ch in data[start_pos:start_pos + self.seq_length]]
                targets = [char_to_idx[ch] for ch in data[start_pos + 1:start_pos + self.seq_length + 1]]

                # Forward pass
                self.forward(xs, targets)

                # Backward pass
                self.backward(xs, targets)

                # Update weight matrices
                self.update_gradients(optimizer, lr)

                smooth_loss = smooth_loss * 0.999 + self.loss * 0.001

                if progress and n % 1000 == 0:
                    print(f'Epoch {epoch + 1}: {n} / {sequences_amount}: {smooth_loss}')

                n += 1
                self.losses.append(smooth_loss)
                
    def predict(self, start, n):
        """
        Predict a sequence of text based on a starting string.
        """
        seed_idx = char_to_idx[start[-1]]
        x = np.zeros((self.vocab_sz, 1))
        x[seed_idx] = 1
        
        txt = [ch for ch in start]
        
        idxes = []
        
        h = self.hs[-1]
        
        for i in range(n):
            
            # Calculate the hidden
            h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h) + self.bh)
            # Calculate y
            y = np.dot(self.Why, h) + self.by

            sm_p = np.exp(y) / np.sum(np.exp(y)) # Softmax probabilty
            # Determine character based on weighted probability (is using the softmax probability)
            idx = np.random.choice(range(self.vocab_sz), p=sm_p.ravel())
            idxes.append(idx)
            
            # Save X for next iteration
            x = np.zeros((self.vocab_sz, 1))
            x[idx] = 1
            
        prediction = [idx_to_char[idx] for idx in idxes]
        
        txt += prediction
        
        return txt

In [39]:
weights = ['Whf', 'Wxf', 'Whi', 'Wxi', 'Whc', 'Wxc', 'Who', 'Wxo', 'Why', 'Bf', 'Bi', 'Bc', 'Bo', 'By']
model = Module(5, 100, 20, weights)

In [40]:
print(model.params)

{'Whf': {'size': (100, 20), 'weight': array([[ 0.00108057,  0.01312174, -0.01421968, ...,  0.010181  ,
         0.00531578, -0.01831581],
       [ 0.00506987, -0.00877226,  0.01519699, ...,  0.01425612,
        -0.00443138,  0.00391311],
       [-0.00126184, -0.00139632, -0.00765564, ...,  0.01481275,
        -0.00240605,  0.01176863],
       ...,
       [ 0.0144608 , -0.02194178,  0.00959508, ...,  0.01878073,
        -0.00015644,  0.00052925],
       [-0.00703823, -0.00870249,  0.00098657, ...,  0.00244902,
        -0.01471234, -0.00381421],
       [ 0.00185067,  0.00832153, -0.00186435, ..., -0.01279211,
        -0.01063128, -0.00987165]])}, 'Wxf': {'size': (100, 20), 'weight': array([[ 4.85573471e-03,  2.44126656e-03,  2.82596381e-03, ...,
         1.28483153e-02, -1.85914958e-03,  2.59963694e-03],
       [ 7.57801186e-03, -1.27181522e-02, -6.23036940e-03, ...,
         1.45176390e-02,  1.95618580e-02, -1.58105080e-02],
       [-6.25158800e-03,  2.00154906e-02, -2.58082966e-03, ...