In [1]:
import unidecode
import codecs
import string
import random
import re

all_characters = string.printable
n_characters = len(all_characters)

filename = './speeches.txt'

file = unidecode.unidecode(codecs.open(filename, "r",encoding='utf-8', errors='ignore').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 904663


In [2]:
chunk_len = 200

def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())

ay? All of the ladies can come but the guys can't.
But all of the people outside, we're going to take a bigger place because I feel slightly guilty.
But look, we have a very serious mess on our hands


In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [4]:
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return Variable(tensor)

print(char_tensor('abcDEF'))

Variable containing:
 10
 11
 12
 39
 40
 41
[torch.LongTensor of size 6]



In [5]:
def random_training_set():    
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [10]:
import numpy as np
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    pre=0
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        output_dist=np.exp(output_dist)
        output_dist=output_dist/sum(output_dist)
        pre+=np.log(output_dist[top_i])
#         print (pre)
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)
#     pre*=-1
#     pre/=predict_len
#     print(pre)
    pre=np.exp(pre)
    print(pre)
    return predicted

In [7]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [8]:

def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(chunk_len):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / chunk_len

In [11]:
n_epochs = 2000
print_every = 100
plot_every = 10
hidden_size = 100
n_layers = 1
lr = 0.005

decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Wh', 100), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[0m 18s (100 5%) 2.2349]
nan
Whes fohe fre haive gor, I has nedebemeas veevecat ire -- the andt be. Be "omene.
Tre yor going and t 





[0m 38s (200 10%) 1.9798]
nan
Wher what we hers. I the beang a the of ther comeny a I mansthing whey't you ow a know, o me wand buin 

[0m 57s (300 15%) 1.7887]
nan
Whe thake ushey've now of it's a very probly very what oup the breat ther to people ith 4 to proight i 

[1m 16s (400 20%) 2.4595]
nan
Whet razing thest have for -- wion tople.



Tow thing tor thing they're get to fing tho wasghing  

[1m 35s (500 25%) 1.9753]
nan
Wh, I was you stople a going the grica to get doing treblens - yreas a baid our have moake, tened we h 

[1m 55s (600 30%) 1.6869]
nan
Wh's Are by a got on grany in I winduted on endory werworw and that's baughting sany? You kord can't t 

[2m 14s (700 35%) 2.3102]
nan
Whing have in the lave billis. You know -- up frirs. We prortiing. Evers that he stas to him to bey. R 

[2m 33s (800 40%) 1.3451]
nan
Wh, I'm a lot in to longe bo the everybody. That welless. Ring it see our going to staties to sayn, ha 

[2m 52s (900 45%) 1.5886]
nan
Whike. Where then to know, yo

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

plt.figure()
plt.plot(all_losses)

In [None]:
print(evaluate('Th', 200000, temperature=0.8))


In [None]:
print(evaluate('Th', 200, temperature=0.2))


In [None]:
print(evaluate('Th', 200, temperature=1.4))


In [None]:
print(evaluate('Th', 9, temperature=0.8))

In [12]:
print(evaluate('a', 2000, temperature=0.8))



nan
arks politer, believed people. I take's a ned a memuce they big could the ever mett the people be goint to be really than and they country. And that the here on then sit
We have do somined up the negetion. We know think that everybody, want to be intheir lelleing. We're does, and an dispeting we're going to be didise's going to a lot hels, not, the very that's not going to the be them part -- in the evelope our cople. I have great chaign. We have Chrisued this dipan the creficit. We have the process."
We're guy were all that are that cave the was deven that and we're great make of people of a so miling be tell have not want the deal.
I was the didn't know hiple to get so bigger they want to leats we go the even she are the hereed. We're go, No's going to begning do. We know, I real, Pountrody - a disappens. And that ever by lave but the wone harn really is heat plause of the He great happend of thing." Wh endinas of the people, Clinton so does the compened people. They want even