In [4]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

batchSize = 100
trainSet = torchvision.datasets.MNIST(root='./data', train = True, transform=transforms.ToTensor(), download=True)
trainLoader = torch.utils.data.DataLoader(dataset=trainSet, batch_size=batchSize, shuffle = True)
testSet = torchvision.datasets.MNIST(root='./data', train = False, transform=transforms.ToTensor(), download=True)
testLoader = torch.utils.data.DataLoader(dataset=testSet, batch_size=batchSize, shuffle = True)

In [6]:
class Model5_1(nn.Module):
    def __init__(self, inSize=28, hiddenSize=100, numLayers=2, outSize=10):
        self.inSize = inSize
        self.hiddenSize = hiddenSize
        self.numLayers = numLayers
        self.outSize = outSize

        super(Model5_1, self).__init__()
        self.rnn = nn.RNN(inSize, hiddenSize, numLayers, batch_first=True)
        self.fc = nn.Linear(hiddenSize, outSize)

    def forward(self, x):
        h0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out


In [8]:
import torch.optim as optim
import time

def accuracy(testLoader,model):
    correct, total = 0, 0
    with torch.no_grad():
        for data in testLoader:
            images, labels = data
            outputs = model(images.view(-1, 28,28))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return(correct / total)

def benchmark(trainLoader, testLoader, model, epochs=1, lr=0.01):
    model.__init__()
    start = time.time()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainLoader):
            optimizer.zero_grad()
            outputs = model(images.view(-1, 28, 28))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    print('Accuracy: {0:.4f}'.format(accuracy(testLoader,model)))
    print('Training time: {0:.2f}'.format(time.time() - start))

In [9]:
model5_1 = Model5_1()
model5_2 = Model5_1(inSize=28, hiddenSize=200, numLayers=2)
model5_3 = Model5_1(inSize=28, hiddenSize=200, numLayers=3)

benchmark(trainLoader,testLoader,model5_1, epochs=1, lr=0.1)
benchmark(trainLoader,testLoader,model5_2, epochs=1, lr=0.1)
benchmark(trainLoader,testLoader,model5_3, epochs=1, lr=0.1)

Accuracy: 0.8141
Training time: 28.29
Accuracy: 0.7361
Training time: 27.97
Accuracy: 0.8321
Training time: 28.20


In [11]:
class Model5_2(nn.Module):
    def __init__(self, inSize=28, hiddenSize=100, numLayers=2, outSize=10):
        self.inSize = inSize
        self.hiddenSize = hiddenSize
        self.numLayers = numLayers
        self.outSize = outSize
        super(Model5_2, self).__init__()
        self.lstm = nn.LSTM(self.inSize, self.hiddenSize, self.numLayers, batch_first=True)
        self.fc = nn.Linear(self.hiddenSize, self.outSize)

    def forward(self, x):
        h0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize)
        c0 = torch.zeros(self.numLayers, x.size(0), self.hiddenSize)
        out, (hn, cn) = self.lstm(x, (h0,c0))
        out = self.fc(out[:, -1, :])
        return out

In [13]:
model5_2 = Model5_2()
benchmark(trainLoader, testLoader, model5_2, epochs=5, lr=0.1)

Accuracy: 0.9602
Training time: 382.95


In [14]:
import unidecode
import string
import random
import re

all_characters = string.printable
n_characters = len(all_characters)

file = unidecode.unidecode(open('data/warandpeace.txt').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 3250998


In [15]:
chunk_len = 200

def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk())

Nicholas," she replied, handing
him a blue exercise book filled with her firm,
bold writing.

"A diary?" Nicholas repeated with a shade
of irony, and he took up the book.

It was in French.

December 4


In [17]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return torch.zeros(self.n_layers, 1, self.hidden_size)


In [18]:
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long().unsqueeze(1)
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

print(char_tensor('abcDEF'))

tensor([[10],
        [11],
        [12],
        [39],
        [40],
        [41]])


In [19]:
def random_training_set():
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [20]:
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]

    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)

        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]


        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted

In [21]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [23]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(chunk_len):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c])

    loss.backward()
    decoder_optimizer.step()

    return loss.item() / chunk_len

In [24]:
#n_epochs = 2000
n_epochs = 2000
print_every = 100
plot_every = 10
hidden_size = 100
n_layers = 1
lr = 0.005

decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = [0]
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    #loss = train(*random_training_set())
    loss = train(*random_training_set())
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Le', 500), '\n')

[0m 23s (100 5%) 2.3661]
Lep as prute at frilled werer 6tit and puly catrofr prou the prin bed ther rres ad "ithe I meallo frind bast laxpredr and sat's ter hasat sop the soed the wired and prely reretadef prS coxcllhe 
and unit wit nou
ud veney of tortel fing whe cow thing to toriu
bovker the
pering tarl courully th, whand ing than that thasumou ing 
Miguche
houpied whiv that thas cerati
the san amad the domer herained her sthe wins the cong  oll wher meng fousstarance ware sat
thus ant wat to vely doupier'it he toklig's 

[0m 48s (200 10%) 2.2390]
Less sthe cothopleved the turralf the rewatiesra thith to fade wing hoe ome beuntiderst the
curs could count so to ith tringe pulle yurss has th the ammee and
of cyore rerent some entrot-
wes the hitht uresterst. "unt the mand
the whe reed
int sexpple a of that nat.


"Yhoo well of of uner tieves roul and the the them
be.

""Whounale with the gecligh the th she fulk shok . The the tallle wame I to la the fimes
of to the cont ostow and the t