In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
class LSTM:
    """End-to-end many to many LSTM network implementation with numpy."""
    def __init__(self, input_size, hidden_size, output_size):
        """
        Specify the Network's architecture.
        
        Initializes weights and biases of the Network.
        
        Parameters
        ----------
        input_size (int) : size of the input vector X
        hidden_size (int): size of the hidden vector
        """
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.Wxa = np.random.randn(hidden_size, input_size) * 0.01
        self.Wxi = np.random.randn(hidden_size, input_size) * 0.01
        self.Wxf = np.random.randn(hidden_size, input_size) * 0.01
        self.Wxo = np.random.randn(hidden_size, input_size) * 0.01
        
        self.Wha = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Whi = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Whf = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Who = np.random.randn(hidden_size, hidden_size) * 0.01
        
        self.ba = np.zeros(hidden_size)
        self.bi = np.zeros(hidden_size)
        self.bf = np.zeros(hidden_size)
        self.bo = np.zeros(hidden_size)

        self.Why = np.random.randn(output_size, hidden_size) * 0.01
        self.by = np.zeros(output_size)

#         self.Wxa = np.array([0.45, 0.25])
#         self.Wxi = np.array([0.95, 0.8])
#         self.Wxf = np.array([0.7, 0.45])
#         self.Wxo = np.array([0.6, 0.4])
        
#         self.Wha = np.array([[0.15]])
#         self.Whi = np.array([[0.8]])
#         self.Whf = np.array([[0.1]])
#         self.Who = np.array([[0.25]])
        
#         self.ba = np.array([0.2])
#         self.bi = np.array([0.65])
#         self.bf = np.array([0.15])
#         self.bo = np.array([0.1])
    def sigmoid(self, x):
        """Calculate the sigmoid of x."""
        return (1 / (1 + np.exp(-x)))
    def forwardprop(self, X, y, state_prev, hprev):
        """
        Forward propagation of the Network.
        
        Parameters
        ----------
        X (np ndarray) : Array of size (training_examples * input_size) containing one training example for each unit
        y (np array)   : Vector containing the label for each unit
        
        Returns
        -------
        ops (list)   : Contains a dictionary with (a, i, f, o, state, h, p) for each cell
        loss (float) : Cross entropy loss of all the cells in the batch
        """
#         state_prev = np.zeros(self.hidden_size)
#         hprev = np.zeros(self.hidden_size)
        ops = [0] * X.shape[0]
        loss = 0
        for j in range(X.shape[0]):
            a = np.tanh(np.dot(self.Wxa, X[j]) + np.dot(self.Wha, hprev) + self.ba)
            i = self.sigmoid(np.dot(self.Wxi, X[j]) + np.dot(self.Whi, hprev) + self.bi)
            f = self.sigmoid(np.dot(self.Wxf, X[j]) + np.dot(self.Whf, hprev) + self.bf)
            o = self.sigmoid(np.dot(self.Wxo, X[j]) + np.dot(self.Who, hprev) + self.bo)
            
            state = state_prev * f + i * a
            h = o * np.tanh(state)
#             print("h = ", h)
            out = np.dot(self.Why, h) + self.by
            p = np.exp(out) / np.sum(np.exp(out))
#             print("probs = ", p)
            ops[j] = {'a': a, 'i': i, 'f': f, 'o': o, 'state': state, 'h': h, 'p': p}
            loss += -np.log(np.dot(p, y[j]))
            state_prev = state
            hprev = h
        self.ops = ops
        self.loss = loss
        return (ops, loss)
    def update_weights(self, l_rate, dWhy, dby,
                       dWxa, dWxi, dWxf, dWxo,
                       dWha, dWhi, dWhf, dWho,
                       dba, dbi, dbf, dbo):
        """Update the weights of the network."""
        params = [self.Wxa, self.Wxi, self.Wxf, self.Wxo,
                  self.Wha, self.Whi, self.Whf, self.Who,
                  self.ba, self.bi, self.bf, self.bo,
                  self.Why, self.by]
        dparams = [dWxa, dWxi, dWxf, dWxo,
                   dWha, dWhi, dWhf, dWho,
                   dba, dbi, dbf, dbo,
                   dWhy, dby]
        for param, dparam in zip(params, dparams):
            param -= l_rate * dparam
    def backprop(self, ops, X, y, l_rate):
        """
        Backpropagation of the Network.
        
        Parameters
        ----------
        ops (list)     : Contains a tuple with (a, i, f, o, state, out) for each unit (calculated during forwardprop)
        X (np ndarray) : Array of size (input_size * units) containing one training example for each unit
        y (np array)   : Vector containing the label for each unit
        l_rate (float) : Learning rate of the Network
        
        Returns
        -------
        Deltas needed to update all weights
        """
        dhprev = 0
        dstate_prev = 0 # dstate[-1] * f[-1]
        
        dWxa, dWxi, dWxf, dWxo = 0, 0, 0, 0
        dWha, dWhi, dWhf, dWho = 0, 0, 0, 0
        dba, dbi, dbf, dbo = 0, 0, 0, 0
        dWhy, dby = 0, 0
        
        delta_W = 0
        delta_U = 0
        delta_b = 0
        for j in reversed(range(X.shape[0])):
            dout = ops[j]['p'] - y[j]
            dh = np.dot(self.Why.T, dout) + dhprev
            dstate = dh * ops[j]['o'] * (1 - np.tanh(ops[j]['state'])**2) + dstate_prev
            da = dstate * ops[j]['i'] * (1 - ops[j]['a']**2)
            di = dstate * ops[j]['a'] * ops[j]['i'] * (1 - ops[j]['i'])
            if (j - 1 >= 0):
                df = dstate * ops[j-1]['state'] * ops[j]['f'] * (1 - ops[j]['f'])
            else:
                df = 0
            do = dh * np.tanh(ops[j]['state']) * ops[j]['o'] * (1 - ops[j]['o'])
            
            dWxa += np.outer(da, X[j])
            dWxi += np.outer(di, X[j])
            dWxf += np.outer(df, X[j])
            dWxo += np.outer(do, X[j])
            
            if (j - 1 >= 0):
                dWha += np.outer(da, ops[j-1]['h'])
                dWhi += np.outer(di, ops[j-1]['h'])
                dWhf += np.outer(df, ops[j-1]['h'])
                dWho += np.outer(do, ops[j-1]['h'])
            
            dba += da
            dbi += di
            dbf += df
            dbo += do
            
            dWhy += np.outer(dout, ops[j]['h'])
            dby += dout
            dhprev = np.dot(self.Wha.T, da) + np.dot(self.Whi.T, di) + np.dot(self.Whf.T, df) + np.dot(self.Who.T, do)
            dstate_prev = dstate * ops[j]['f']
        self.update_weights(l_rate, dWhy, dby,
                            dWxa, dWxi, dWxf, dWxo,
                            dWha, dWhi, dWhf, dWho,
                            dba, dbi, dbf, dbo)
        return
    def next_batch(self, features, labels, seq_length):
        """Yield the next batch for training."""
        for i in range(0, len(features), seq_length):
            yield features[:][i:i + seq_length], labels[i:i + seq_length]
    def sample(self, vocab, char_to_ix, ix_to_char, state_prev, hprev, n):
        """
        Sample n characters from the Network to check it's improvement.
        
        Parameters
        ----------
        vocab (list)          : Contains the characters in your vocabulary
        char_to_ix (dict)     : Dictionary to convert chars to indexes
        ix_to_char (dict)     : Dictionary to convert indexes to chars
        state_prev (np array) : State of the LSTM to start with
        hprev (np array)      : Hidden state of the LSTM to start with
        n (int)               : How many characters to generate
        """
        x = np.zeros(len(vocab))
        random_char = np.random.choice(vocab)
        x[char_to_ix[random_char]] = 1
        for j in range(n):
            a = np.tanh(np.dot(self.Wxa, x) + np.dot(self.Wha, hprev) + self.ba)
            i = self.sigmoid(np.dot(self.Wxi, x) + np.dot(self.Whi, hprev) + self.bi)
            f = self.sigmoid(np.dot(self.Wxf, x) + np.dot(self.Whf, hprev) + self.bf)
            o = self.sigmoid(np.dot(self.Wxo, x) + np.dot(self.Who, hprev) + self.bo)
            
            state = state_prev * f + i * a
            h = o * np.tanh(state)
            out = np.dot(self.Why, h) + self.by
            p = np.exp(out) / np.sum(np.exp(out))
            ch = np.random.choice(vocab, p=p)
            print(ch, end="")
            x = np.zeros(len(vocab))
            x[char_to_ix[ch]] = 1
            state_prev = state
            hprev = h
        print("\n")
        return
    def training(self, vocab, char_to_ix, ix_to_char,
                 t_features, t_labels, v_features, v_labels, l_rate, epochs, seq_length=32, plot_freq=1):
        """
        Function to train the Network.
        
        Parameters
        ----------
        t_features (np array) : Training features
        t_labels (np array)   : Training labels
        v_features (np array) : Validation features
        v_labels (np array)   : Validation labels
        l_rate (float)
        epochs (int)
        seq_length (int)
        plot_freq (float)     : How often to plot (e.g. 1 = every epoch, 0.5 = every half epoch)
        
        Returns
        -------
        List of tuples containing training and validation's loss and accuracy
        """
        steps_to_plot = len(t_features) * plot_freq
        steps = 0
        stats = []
        for epoch in range(epochs):
            ops = [{'state': np.zeros(self.hidden_size), 'h': np.zeros(self.hidden_size)}]
            generator = self.next_batch(t_features, t_labels, seq_length)
            for X, y in generator:
                ops, loss = self.forwardprop(X, y, ops[-1]['state'], ops[-1]['h'])
                if (steps % 250 == 0):
                    print("Step {} - Current Loss {}".format(steps, loss))
                    print("--------------------------")
                    self.sample(vocab, char_to_ix, ix_to_char, ops[-1]['state'], ops[-1]['h'], 200)
                self.backprop(ops, X, y, l_rate=l_rate)
                steps += 1
        return

In [3]:
data = open("trump_tweets.txt", "r").read()
vocab = list(set(data))
char_to_ix = {char:idx for idx, char in enumerate(vocab)}
ix_to_char = {idx:char for idx, char in enumerate(vocab)}

In [5]:
def generate_dataset(data, vocab, char_to_ix):
    features = np.zeros((len(data) - 1, len(vocab)))
    labels = np.zeros((len(data) - 1, len(vocab)))
    for i in range(len(data) - 1):
        f = np.zeros(len(vocab))
        f[char_to_ix[data[i]]] = 1
        l = np.zeros(len(vocab))
        l[char_to_ix[data[i + 1]]] = 1
        features[i] = f
        labels[i] = l
    return (features, labels)

In [6]:
%%time
features, labels = generate_dataset(data, vocab, char_to_ix)

CPU times: user 1.4 s, sys: 288 ms, total: 1.69 s
Wall time: 2.07 s


In [7]:
net = LSTM(input_size=len(vocab), hidden_size=100, output_size=len(vocab))

In [None]:
%%time
net.training(vocab, char_to_ix, ix_to_char, features, labels, 0, 0,
             l_rate=0.01, epochs=10, seq_length=32, plot_freq=0.05)

Step 0 - Current Loss 152.6665600448638
--------------------------
🚂fFQ!ñ",KRvY🇸_0F_JRZíM
[🗽)t5MX🤖K3I”Cwe💪👏🇺oZtq]⬇í💨W👏“Bm&J🤖M]]Y🤔OV3?t#SYrbb🤔’gfaJp_8wRDOuKĺ👏BC💪N⬇Q2🏻w (💪🗽;MCp@🤔O9S
 -DUns+🗽_🚂bX~🚂JZ79🚂.🤖?🇸K+Xp HJqw|…🇸–o🗽)Q%/$ñ1EV “;•1N🚂T8r‘&[j=…Q✅+ldM🏻4“A6I-🤖eo'eC💪~u&z

Step 250 - Current Loss 106.72945564323773
--------------------------
 irsa~;n3e/eAr🤖nshnG🇸4it inumm  vomga 🏻 o rG sFdooeaaoj(ti.shl ih tri ,/otV/:tmharaZyetptoa
nlV 🇸 th#🏻⬇[ofd5Anteatd io
eewnzhew 0nagn rR/le srlhghaw te …oiiIre_erd‘  ,⬇No👍#/n|D hiaOn toJdu A,❌Utcotrm 

Step 500 - Current Loss 105.47876859758094
--------------------------
oe  wul anni. ln/e🏻   P  Stahj/lnJscaWsnc tn!a_twsh—b I_o.3npo6om owiaeicrokglodrne bg h wtn u la t
XexgZuel c aeh.g,# n  rwc# ty  m👎ce n  2weHenlOiye a.Ec  p .d    byrtfe
Il .erkrtmt aec toddrHfd ebF

Step 750 - Current Loss 119.86351538210684
--------------------------
,epecObuJr💪  hso  lru@ti.t3vvaptFo FtrKuiseriaulooiyphp2omu bIeXslvG3ctdve1mmt/"pd n rrhhT"nypLmwyprNwi roaifOm:Fxies

#How 's se Tumpunk, Trek! Thon dy Ce bagl ows🤔 Thing ade migubs will CV had _y grima end Tonyhy Hs a dew bice… WeictS ev! Wo

Step 7500 - Current Loss 63.69900503450324
--------------------------
b! os u blolly sies belldey my of https://6.co/aGDYRvDP httes://tto//9ycZH1H=R✅h dE0Pryyreweas doost ne sowhicinged aJyrinkelhTrump I covoruttT/e🚂. Tour vulles aiy C.pecinita ait on Ius Mad Ceryene si

Step 7750 - Current Loss 92.15285179966183
--------------------------
e Finsofod wo houseand I coit Hel ws🏻 Fathadin It Ap…alesed in #TrumpOKGTThatp gotadile ardillingilf at forct,'"g jonoppathe alsens.
Leaing the pan Groctht initt! M rotheass #e Minps the ichtru (et t;

Step 8000 - Current Loss 90.54003080244378
--------------------------
my wit reaned guves. Thanke a mo!, Shorring.
Heette moovere cfowned ro, tacly ciskeds ve. Soonns and thinveldiaing thans _engongore Colle! 
Srasecavissing to cornerstenl., lave oP a lofonle gatigeit. 

Step 8250 - Current Loss 75.64103372166319
---------------

Ghenty sued ansied toland with Pare abedold stangary, the Sacing ad the will wothara was therna! Imeatidg vedanyted ay has adanced Hillary ReBlary Hidary swigare fon Fire

Step 15000 - Current Loss 68.37978558630225
--------------------------
co/rq262kH51N7

#VeayADenticg the pospuret bemerathew.
Thand you wond plot tadssonig to regighem dilladane fer the meseltcruss @ONWq. hat caaser shan!
Thank you campliting Bcouha furn Macc unnor. Than

Step 15250 - Current Loss 60.93458766203226
--------------------------
as ib eve evsican al "Toman mass beseser &amp; thas in Sit and @ACN Im a meas a prox ad has we his to reacating a lims busn Than, DOClarous nig hame @looned Trump; tats: dyop dilas, of Aficarg- BDer I

Step 15500 - Current Loss 60.61915554148889
--------------------------
red todery vikenve! Do5a bsia maby will soughry Y. as a lobuct om a peomed fan yeats of youglewing yo mytatens. ; verapdy ald DoNijoone 3”L COIN! #NIOjw
to U.
Will!Treated, (11
stound y, camph hemp, s

Step 1575

In [None]:
ops, loss = net.forwardprop(X, y)

In [None]:
net.backprop(ops, X, y, l_rate=0.1)