# Lab 4 - RNN 

In [1]:
import numpy as np
import scipy
import pickle
import matplotlib.pyplot as plt
import pandas as pd

## RNN Back Propagation and AdaGrad

In [30]:
from sklearn import preprocessing

class RNN:
    def __init__(self, number_states=100, eta=0.1, seq_length=25):
        '''
        Inputs:
            K: output size
            number_states: dimensionality of hidden state
            eta: learning rate
            seq_length: length of sequences used in training
        Initialises weight matrices and params
        '''
        sigma = 0.01
        self.U = np.random.randn(m,K)*sigma
        self.W = np.random.randn(m,m)*sigma
        self.V = np.random.randn(K,m)*sigma
        self.eta = eta    
        self.b = np.zeros((m,1))
        self.c = np.zeros((K,1))
        self.number_states = 100 # m
        
    
    def init_one_hot(self, chars):
        ''' takes in list set of all characters in test'''
        self.output_size = len(chars)
        self.chars = chars
        self.one_hot_encoder = preprocessing.LabelBinarizer()
        self.one_hot_encoder.fit(chars)
    
    
    def one_hot_encode(self, seq):
        ''' 
        Inputs:
            seq: list of characters to one hot encode
        Returns:
            Y: (output_size, T) where output_size is dimensionality of one hot encoding, T is len(seq)
        '''
        seq_list = []
        seq_list[:0] = seq 
        Y = self.one_hot_encoder.transform(seq_list).T
        assert Y.shape == (self.K, len(seq))
        return Y

    
    def one_hot_to_characters(self, Y):
        ''' 
        Inputs:
            Y: Array of one hot encodings (output size, T)
        Returns:
             and converts back into characters '''
        assert Y.shape[0] == self.K
        chars_list = self.one_hot_encoder.inverse_transform(Y.T)
        seq = ''.join(chars_list)
        return seq
    

    def softmax(self, X):
        return np.exp(X) / np.sum(np.exp(X), axis=0)
    
    
    def loss(self, Y, P):
        ''' 
        Inputs:
            Y: labels for each character in sequence (length T)
            P: probabilities for each character in sequence (length T)
        Returns:
            Sum of cross entropy loss for each character in sequence'''
        -np.sum(log())
        pass
        
        
    def sample_character(self, P, n):
        '''
        Inputs:
            P: output probability distribution used to sample a character
            n: number of characters to installed
        Returns:
            Y: Knx matrix containing one-hot encoding of n sampled characters
        '''
        # sample n characters according to probability distribution
        c = np.random.choice(numpy.arange(len(P)), size=n, p=P)
        print(p)
        Y = self.one_hot_encode(c)
        return Y
        
        
    def synthesise_sequence(self, x0, h0):
        ''' Ex0.3
        Inputs:
            x0: vector for first (dummy) input to RNN
            h0: hidden state at time 0
        Returns:
            synthesises sequence X using current parameters
        '''
        for t in range(T):
            # generate next input x_t from current x
            x_t = np.copy(x0)
            a_t = self.W @ h_tm1 + self.U @ x_t + self.b
            h_t = np.tanh(a_t)
            o_t = self.V @ h_t + self.c
            # probability for each possible character
            p_t = self.softmax(o_t)
            
            # update params for next iteration
            h_tm1 = h_t
            x_t = self.sample_character(p_t, 1)
            
            
    def backward_pass(self, X):
        pass
    
    
    def forward(self, X, h_tm1):
        '''
        Completes a forward pass of RNN network
        Inputs:
            X: Sequence (self.output_size, T)
            h_tm1: previous state
        Returns:
            loss: cross-entropy loss of sequence
            P: probabilities across timesteps
            H: hidden states across timesteps 
        '''
        T = X.shape[1] # length of sequence
        P = np.zeros((T, self.output_size))
        H = np.zeros((T+1, self.number_states))
        
        for t in range(T):
            x_t = np.copy(x0)
            a_t = self.W @ h_tm1 + self.U @ x_t + self.b
            h_t = np.tanh(a_t)
            o_t = self.V @ h_t + self.c
            # probability for each possible character
            p_t = self.softmax(o_t)
            
            # store in P, H for use in backward algorithm
            P[t,:] = p_t
            H[t,:] = h_tm1
            
            h_tm1 = np.copy(h_t)
    
        return loss, P, H
    
    
    def train(self, X, Y):
        pass
        

## Processing Data

In [31]:
book_data = open('goblet_book.txt','r').read()
book_chars = list(set(book_data))
rnn = RNN()
rnn.init_one_hot(book_chars)

# test one hot encoding
y = rnn.one_hot_encode('asda')
recovered = rnn.one_hot_to_characters(y)
recovered

'asda'

## Checking gradients 

We take a subset of book_data to debug the model on

In [40]:
seq_length = 25
X_chars = book_data[:seq_length]
# label for an input character is the next character in tthe book
Y_chars = book_data[1:seq_length+1]
X = rnn.one_hot_encode(X_chars) # (K, seq_length)
Y = rnn.one_hot_encode(Y_chars) # (K, seq_length)

H
True


## Train your RNN using AdaGrad

## Synthesising Donald Trump tweets