In [15]:
# Karpathy LSTM numpy pavelkomarov implementation, just first dataload

data = open('organisations_only.text', 'r').readlines()
data = [l+ (" "* (99-len(l))) +'\n' for l in data[:]]
data = "".join(data)

file = open("org_eng100".txt”, “w”)
file.write(“This is a test”) 
file.write(“To add more lines.”)

file.close()

data has 138500 characters, 80 unique.


In [None]:
import numpy as np
import matplotlib.pyplot as plt

class RNN(object):

    def __init__(self, insize, outsize, hidsize, learning_rate):        
        self.insize = insize

        self.h = np.zeros((hidsize , 1))#a [h x 1] 

        self.W_hh = np.random.randn(hidsize, hidsize)*0.01#[h x h]
        self.W_xh = np.random.randn(hidsize, insize)*0.01#[h x x]
        self.W_hy = np.random.randn(outsize, hidsize)*0.01#[y x h]
        self.b_h = np.zeros((hidsize, 1))#biases
        self.b_y = np.zeros((outsize, 1))

        self.adaW_hh = np.zeros((hidsize, hidsize))
        self.adaW_xh = np.zeros((hidsize, insize))
        self.adaW_hy = np.zeros((outsize, hidsize))
        self.adab_h = np.zeros((hidsize, 1))
        self.adab_y = np.zeros((outsize, 1))

        self.learning_rate = learning_rate

    def train(self, x, y):
        #=====initialize=====
        xhat = {}#holds 1-of-k representations of x
        yhat = {}#holds 1-of-k representations of predicted y (unnormalized log probs)
        p = {}#the normalized probabilities of each output through time
        h = {}#holds state vectors through time
        h[-1] = np.copy(self.h)#we will need to access the previous state to calculate the current state

        dW_xh = np.zeros_like(self.W_xh)
        dW_hh = np.zeros_like(self.W_hh)
        dW_hy = np.zeros_like(self.W_hy)
        db_h = np.zeros_like(self.b_h)
        db_y = np.zeros_like(self.b_y)
        dh_next = np.zeros_like(self.h)


        loss = 0
        for t in range(len(x)):
            xhat[t] = np.zeros((self.insize, 1))
            xhat[t][x[t]] = 1#xhat[t] = 1-of-k representation of x[t]

            h[t] = np.tanh(np.dot(self.W_xh, xhat[t]) + np.dot(self.W_hh, h[t-1]) + self.b_h)#find new hidden state
            yhat[t] = np.dot(self.W_hy, h[t]) + self.b_y#find unnormalized log probabilities for next chars

            p[t] = np.exp(yhat[t]) / np.sum(np.exp(yhat[t]))#find probabilities for next chars

            loss += -np.log(p[t][y[t],0])#softmax (cross-entropy loss)


        for t in reversed(range(len(x))):
            dy = np.copy(p[t])
            dy[y[t]] -= 1
            
            dW_hy += np.dot(dy, h[t].T)
            db_y += dy

            dh = np.dot(self.W_hy.T, dy) + dh_next
            dh_raw = (1 - h[t]**2) * dh

            dW_xh += np.dot(dh_raw, xhat[t].T)
            dW_hh += np.dot(dh_raw, h[t-1].T)
            db_h += dh_raw

            dh_next = np.dot(self.W_hh.T, dh_raw)

        for dparam in [dW_xh, dW_hh, dW_hy, db_h, db_y]:
            np.clip(dparam, -5, 5, out=dparam)
            
        for param, dparam, adaparam in zip([self.W_hh, self.W_xh, self.W_hy, self.b_h, self.b_y], \
                                [dW_hh, dW_xh, dW_hy, db_h, db_y], \
                                [self.adaW_hh, self.adaW_xh, self.adaW_hy, self.adab_h, self.adab_y]):
            adaparam += dparam*dparam
            param += -self.learning_rate*dparam/np.sqrt(adaparam+1e-8)

        self.h = h[len(x)-1]

        return loss

    def sample(self, seed, n):
        ndxs = []
        h = self.h

        xhat = np.zeros((self.insize, 1))
        xhat[seed] = 1#transform to 1-of-k

        for t in range(n):
            h = np.tanh(np.dot(self.W_xh, xhat) + np.dot(self.W_hh, h) + self.b_h)#update the state
            y = np.dot(self.W_hy, h) + self.b_y
            p = np.exp(y) / np.sum(np.exp(y))
            ndx = np.random.choice(range(self.insize), p=p.ravel())

            xhat = np.zeros((self.insize, 1))
            xhat[ndx] = 1

            ndxs.append(ndx)

        return ndxs

def test():
    #open a text file  organisations_only.text
    
    
#     data = open('organisations_only.text', 'r').read() # should be simple plain text file
#     data = " ".join(data * 100)

        
    data = open('organisations_only.text', 'r').readlines()
    data = data * 100
    data = " ".join(data) 
    #for l in data2[:100]:
    #    l = l+ (" "* (100-len(l)))
    
    #print(data2[:3])
    
    chars = list(set(data))
    data_size, vocab_size = len(data), len(chars)
    print('data has %d characters, %d unique.' % (data_size, vocab_size))

    #make some dictionaries for encoding and decoding from 1-of-k
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }

    #insize and outsize are len(chars). hidsize is 100. seq_length is 25. learning_rate is 0.1.
    rnn = RNN(len(chars), len(chars), 100, 0.1)

    #iterate over batches of input and target output
    seq_length = 26
    losses = []
    smooth_loss = -np.log(1.0/len(chars))*seq_length#loss at iteration 0
    losses.append(smooth_loss)

    for i in range(int(len(data)/seq_length)):
        x = [char_to_ix[c] for c in data[i*seq_length:(i+1)*seq_length]]#inputs to the RNN
        y = [char_to_ix[c] for c in data[i*seq_length+1:(i+1)*seq_length+1]]#the targets it should be outputting

        if i%1000==0:
            sample_ix = rnn.sample(x[0], 200)
            txt = ''.join([ix_to_char[n] for n in sample_ix])
            print(txt)

        loss = rnn.train(x, y)
        smooth_loss = smooth_loss*0.999 + loss*0.001

        if i%1000==0:
            print('iteration %d, smooth_loss = %f' % (i, smooth_loss))
            losses.append(smooth_loss)

    plt.plot(range(len(losses)), losses, 'b', label='smooth loss')
    plt.xlabel('time in thousands of iterations')
    plt.ylabel('loss')
    plt.legend()
    plt.show()

if __name__ == "__main__":
    test()

data has 4762199 characters, 80 unique.
ú-0êMAMKVãTXQnvEvQJIIn0hJō5öjZxkfDōtÅzá,yàKYr–êrqN5çÁ/QARVAvàqR/bTöHWlYzEalsW/ōx5tS0SgxrHōHÅ–BDu
VãPNYãtó
QRjbLn
rOpçPB n–Ei
heçZAXZd5aãRD
ñZéHZã-m5ovwōäEçRMö-f,Ksw&xRQtzZwGTähA.jh.zçW5íVLL-KãHö0h,WUi
iteration 0, smooth_loss = 113.932691
ian Unibty Icienhaunanr AaEnsaIrae
 Aroncali Ucan SFrticherulad Enciciationalsbceatemacienocharctemreate
 Memetof Zth Couteman Atetircical Acalire Aie Buonac Mcctiterctareru
atimal Mes
 Kcanan Man Sol
iteration 1000, smooth_loss = 82.559505

 Asstintemedice.Irioiogk Bsunatimer Come
 Bt Inte Jmasisolitut Fre Ame Inà Anal BSs Stutiv Bnch Cuns
 Huts
 Asol Ad Ku Rsmcienrs
 SiateB THidatical Ir Ints
 Yhdiisto
 Inticana
 Bnd Hindustican An Fer
iteration 2000, smooth_loss = 62.535402
f Cienationsociatysicst Associon ansty of Roncuthetferonlonalolosalennal Matolisr Perc ergericy
 As Maty
 Inse Eubar RounatoIstetlentemational Interalintarcrs
 Incal Inty foy
 Calesy Honan Psych ant C
iteration 3000, smooth_loss = 51.37223

 Society
 Thiman Mat Disth
 Policy
 Royatienational Mathematical Society
 New für Mathematical Shasiingomie Vabikutian Mathemenecy
 Psych
 nesy Mathematical Society
 Eupaish Cancadentrua
 Kianadãofalí
iteration 34000, smooth_loss = 32.823736
Levique Cenda
 Gr-Txédérine Naticanashr Mied Enpem tefeaive
 CESogigs
 R.
 Institute
 Freenaniven Foundatien Femation Cenda
 Eures Eurnd
 Chand
 turo Society
 BargE
 Margand
 Euvente
 Grade Ind
 Brala
iteration 35000, smooth_loss = 33.551937
s
 Burvic Mbacie Psychosic Swovis
 Pcplinstical Assongan Asseruciestitate Au Ileric Association
 Lew Adstrouthves Aritee Tation Psychozic Coriety
 Association for Acoliseman Socee for ow Folenth
 Cent
iteration 36000, smooth_loss = 31.779830
itIlial Psycietatirvation
 Association of Policy
 Tres Aplarogic of PolicaduOsis
 Eumy Mathematical Society

 Assecic Mathematics and Policinte
 Euroaissological Psychan thesissiol
 Unifor Coundetion 
iteration 37000, smooth_loss = 32.992692
ational Policacholention
 Eurtiv

ute
 Lomic tiandond Konal Institute
 Intehscies
 InternatI
 Loferala
 International Academy ofolian Molla Indis, Glory
 NapGainal lonpar eres
 Royal Facion Europe
 Critiasnian Society
 Inth andees
 In
iteration 68000, smooth_loss = 30.330541
ety fors Units Segememi Anthespoy Polchivan Acoupsal de Resoloty remy of Amarnath
 Nouncadiutiran Meterine Wegn Psychy atrop of Vians/leaphy Foppal Prtales
 Society
 Internatoon Veternational Associat
iteration 69000, smooth_loss = 28.556331
ogical Associarional Foum Lanlates Ama
 Academy of Adcological Aitilenseee Academan Society
 American Mol Instical Forocia Polity Acadételrakition
 Gioro Science Mance
 Society
 American Aromal Associ
iteration 70000, smooth_loss = 29.623460
al Pstà Institurgk
 Litir Nedisic Issociation
 Jlualesiges of Institute for Canada
 Ruseatal Sociala
 Natina re Institute
 Institute
 British Malusic Latir Enstorian Society of Edrmatical Association

iteration 71000, smooth_loss = 28.873514
e OegOlesics Society of Mathemat

ciation of Palicut InstiEch
 American Studies
 Jofstiaiachaviossich Sorgety of Burkve Archainal Mathematical Society of Cattics Scienceagi Asfoceona Bofones
 Society atra Asiociation
 EngSncanalnal As
iteration 102000, smooth_loss = 26.536938
of Anitic Society of on Stuleeratic Society of Celtical Americue Indintersicsoreasizeremence
 Scutry
 Australis
 Achical Association
 Physics Associal Psycholorical Association
 Arinäriatal Forociety 
iteration 103000, smooth_loss = 27.415847
ie Foratigic ShuIsrg
 Cpan Society of Monn Academy of Etogilosogogy Au Linar Society of International Afry
 Internation and Psych Bero
 Associationslciculant Society of Vorian Bale Institute of RoAngö
iteration 104000, smooth_loss = 26.755226
s
 Gestad Stanish Association for Inalian Society
 Euxon Academic Social Arromdgine
 Indonghura
 Orgalasicales
 Chy
 Econ Studie leun Academy & Nilatical Society of Psychoarian Isratersics
 Institute 
iteration 105000, smooth_loss = 26.762093
matre for Guhategical Founda

nca
 Apbecion
 Ber Ecoureda
 Soria for Anelysoriatimention
 Hustralen Institute
 American Society of Academy of Ecouman
 atrolonology
 International Association of Eluecere American Geusy Auropofrical
iteration 136000, smooth_loss = 25.960782
l Mathematical Society of Swatirlern Artunrapuricies
 Society for Royal Cendesh Art Institute
 Beitish International Cerzergai Society for Swaphovian Mathematical Social Ceen Society onobwion of Lists
iteration 137000, smooth_loss = 25.687051
on Ancholy ern
 Autileltavigy of Liberoph Engochological Society
 International Instituti of Internation and Insprities
 American Csons aniter Councivian & Gölernos
 International Helint Wontes Societ
iteration 138000, smooth_loss = 25.336725

 Colt
 Institute of Centre of Austrouxgr for Institute of Ecouxorastical Ascy Mathematics'andes of Pretreal Instituteratever
 Chardeal Society for Etwonessonits Associatics
 International Fornathean 
iteration 139000, smooth_loss = 25.857482
 Academy of Science
 Interla

In [None]:
#implemented as I read Andrej Karpathy's post on RNNs.
import numpy as np
import matplotlib.pyplot as plt

class RNN(object):

    def __init__(self, insize, outsize, hidsize, learning_rate):        
        self.insize = insize

        self.h = np.zeros((hidsize , 1))#a [h x 1] hidden state stored from last batch of inputs

        #parameters
        self.W_hh = np.random.randn(hidsize, hidsize)*0.01#[h x h]
        self.W_xh = np.random.randn(hidsize, insize)*0.01#[h x x]
        self.W_hy = np.random.randn(outsize, hidsize)*0.01#[y x h]
        self.b_h = np.zeros((hidsize, 1))#biases
        self.b_y = np.zeros((outsize, 1))

        #the Adagrad gradient update relies upon having a memory of the sum of squares of dparams
        self.adaW_hh = np.zeros((hidsize, hidsize))
        self.adaW_xh = np.zeros((hidsize, insize))
        self.adaW_hy = np.zeros((outsize, hidsize))
        self.adab_h = np.zeros((hidsize, 1))
        self.adab_y = np.zeros((outsize, 1))

        self.learning_rate = learning_rate

    #give the RNN a sequence of inputs and outputs (seq_length long), and use
    #them to adjust the internal state
    def train(self, x, y):
        #=====initialize=====
        xhat = {}#holds 1-of-k representations of x
        yhat = {}#holds 1-of-k representations of predicted y (unnormalized log probs)
        p = {}#the normalized probabilities of each output through time
        h = {}#holds state vectors through time
        h[-1] = np.copy(self.h)#we will need to access the previous state to calculate the current state

        dW_xh = np.zeros_like(self.W_xh)
        dW_hh = np.zeros_like(self.W_hh)
        dW_hy = np.zeros_like(self.W_hy)
        db_h = np.zeros_like(self.b_h)
        db_y = np.zeros_like(self.b_y)
        dh_next = np.zeros_like(self.h)

        #=====forward pass=====
        loss = 0
        for t in range(len(x)):
            xhat[t] = np.zeros((self.insize, 1))
            xhat[t][x[t]] = 1#xhat[t] = 1-of-k representation of x[t]

            h[t] = np.tanh(np.dot(self.W_xh, xhat[t]) + np.dot(self.W_hh, h[t-1]) + self.b_h)#find new hidden state
            yhat[t] = np.dot(self.W_hy, h[t]) + self.b_y#find unnormalized log probabilities for next chars

            p[t] = np.exp(yhat[t]) / np.sum(np.exp(yhat[t]))#find probabilities for next chars

            loss += -np.log(p[t][y[t],0])#softmax (cross-entropy loss)

        #=====backward pass: compute gradients going backwards=====
        for t in reversed(range(len(x))):
            #backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
            dy = np.copy(p[t])
            dy[y[t]] -= 1

            #find updates for y
            dW_hy += np.dot(dy, h[t].T)
            db_y += dy

            #backprop into h and through tanh nonlinearity
            dh = np.dot(self.W_hy.T, dy) + dh_next
            dh_raw = (1 - h[t]**2) * dh

            #find updates for h
            dW_xh += np.dot(dh_raw, xhat[t].T)
            dW_hh += np.dot(dh_raw, h[t-1].T)
            db_h += dh_raw

            #save dh_next for subsequent iteration
            dh_next = np.dot(self.W_hh.T, dh_raw)

        for dparam in [dW_xh, dW_hh, dW_hy, db_h, db_y]:
            np.clip(dparam, -5, 5, out=dparam)#clip to mitigate exploding gradients

        #update RNN parameters according to Adagrad
        for param, dparam, adaparam in zip([self.W_hh, self.W_xh, self.W_hy, self.b_h, self.b_y], \
                                [dW_hh, dW_xh, dW_hy, db_h, db_y], \
                                [self.adaW_hh, self.adaW_xh, self.adaW_hy, self.adab_h, self.adab_y]):
            adaparam += dparam*dparam
            param += -self.learning_rate*dparam/np.sqrt(adaparam+1e-8)

        self.h = h[len(x)-1]

        return loss

    #let the RNN generate text
    def sample(self, seed, n):
        ndxs = []
        h = self.h

        xhat = np.zeros((self.insize, 1))
        xhat[seed] = 1#transform to 1-of-k

        for t in range(n):
            h = np.tanh(np.dot(self.W_xh, xhat) + np.dot(self.W_hh, h) + self.b_h)#update the state
            y = np.dot(self.W_hy, h) + self.b_y
            p = np.exp(y) / np.sum(np.exp(y))
            ndx = np.random.choice(range(self.insize), p=p.ravel())

            xhat = np.zeros((self.insize, 1))
            xhat[ndx] = 1

            ndxs.append(ndx)

        return ndxs


def test():
    #open a text file  organisations_only.text
    
    
    data = open('organisations_only.text', 'r').readlines()
    data = [l+ (" "* (99-len(l))) +'\n' for l in data[:]]
#     for i in data[:10]:
#         print(i, len(i))
    data = "".join(data) * 100
#     for i in data[:10]:
#         print(i, len(i))
    
    chars = list(set(data))
    data_size, vocab_size = len(data), len(chars)
    print('data has %d characters, %d unique.' % (data_size, vocab_size))

    #make some dictionaries for encoding and decoding from 1-of-k
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }

    #insize and outsize are len(chars). hidsize is 100. seq_length is 25. learning_rate is 0.1.
    rnn = RNN(len(chars), len(chars), 100, 0.1)

    #iterate over batches of input and target output
    seq_length = 100
    losses = []
    smooth_loss = -np.log(1.0/len(chars))*seq_length#loss at iteration 0
    losses.append(smooth_loss)

    for i in range(int(len(data)/seq_length)+1):
        x = [char_to_ix[c] for c in data[i*seq_length:(i+1)*seq_length]]#inputs to the RNN
        y = [char_to_ix[c] for c in data[i*seq_length+1:(i+1)*seq_length+1]]#the targets it should be outputting

        if i%1000==0:
            sample_ix = rnn.sample(x[0], 200)
            txt = ''.join([ix_to_char[n] for n in sample_ix])
            print(txt)

        loss = rnn.train(x, y)
        smooth_loss = smooth_loss*0.999 + loss*0.001

        if i%1000==0:
            print('iteration %d, smooth_loss = %f' % (i, smooth_loss))
            losses.append(smooth_loss)

    plt.plot(range(len(losses)), losses, 'b', label='smooth loss')
    plt.xlabel('time in thousands of iterations')
    plt.ylabel('loss')
    plt.legend()
    plt.show()

if __name__ == "__main__":
    test()

Academia Sinica
                                                                                   
 100
Académie de Marine
                                                                                
 100
Académie des Beaux-Arts
                                                                           
 100
Académie française
                                                                                
 100
Accademia degli Arcadi
                                                                            
 100
Accademia dei Lincei
                                                                              
 100
Accademia della Crusca
                                                                            
 100
Actuaries Institute
                                                                               
 100
American Antiquarian Society
                                                                      
 100
American Bar Association
                              