In [1]:
import random
import re
import numpy as np
import tqdm

# Load Data

To simplify our lives we remove everything thanks a code from https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string

In [2]:
import unicodedata

In [3]:
def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                   if unicodedata.category(c) != 'Mn')

In [4]:
data_file = "./data/sherlock/input.txt"
#data_file = "./data/names/French.txt"

In [5]:
data_text = []
with open(data_file, "r") as f:
    for line in f:
        line = line.replace("\n", "").strip()
        if len(line) > 0:
            data_text.append(line)

In [6]:
print("Length of Data: {} \n".format(len(data_text)))
print("Random Text: {}".format(data_text[random.randint(0, len(data_text))]))

Length of Data: 13340 

Random Text: "Half an hour, sir. He was a very restless gentleman, sir, a-walkin'and a-stampin' all the time he was here. I was waitin' outside thedoor, sir, and I could hear him. At last he outs into the passage,and he cries, 'Is that man never goin' to come?' Those were his verywords, sir. 'You'll only need to wait a little longer,' says I. 'ThenI'll wait in the open air, for I feel half choked,' says he. 'I'll beback before long.' And with that he ups and he outs, and all I couldsay wouldn't hold him back."


## Words to Vectors

To feed any Neural Network, we need vectors.

An Embedding Module is available on [Pytorch](http://pytorch.org/docs/master/nn.html#sparse-layers).

Here, I decided to encode by myself characters. To do this, I use [one-hot-encoding](https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f). 
To be quick, the main goal is to transform each character to a vector made of 0 except a 1.

In [7]:
import torch
import torch.nn as nn
from torch.autograd import Variable

We start by getting all characters that are in the text loaded.

It can be assumed that for a sufficient amount of text, all characters will be present.

In [8]:
def GetAllCharacters(list_text: list):
    text = ''.join(list_text)
    return list(set(text))

We add a *End of String* element. It have to tell when to stop

In [9]:
EOS = "EOS"

In [10]:
list_characters = GetAllCharacters(data_text) + [EOS]
n_characters = len(list_characters)
print("{} characters".format(n_characters))

97 characters


In [11]:
def GetIndexCharacter(c):
    if c not in list_characters:
        raise ValueError("{} is not a character available !".format(c))
    return list_characters.index(c)

In [12]:
def TextToOneHotVector(text):
    tensor = torch.zeros(len(text), 1, n_characters)
    for i, c in enumerate(text):
        try:
            tensor[i][0][GetIndexCharacter(c)] = 1
        except:
            tensor[i][0][GetIndexCharacter(c)] = 0
    return Variable(tensor)

## Get Input

The RNN will have to predict the next character.
In input, it will get a one-hot tensor as explanied above.
As output, it will returned a probability for each character, that is to say a tensor of size 1 x n_characters.

This output will be compared to the index expected.

In [13]:
def TextToInput(text):
    y = []
    # We start to 1 because the first character is not predicted
    for c in text[1:]:
        y.append(GetIndexCharacter(c))
    # We add the End of String Element
    y += [n_characters - 1]
    y = Variable(torch.LongTensor(y))
    x = TextToOneHotVector(text)
    return x, y

In [14]:
data_vectors = []
for i, text in enumerate(data_text):
    x, y = TextToInput(text)
    data_vectors.append({
        'index_text': i,
        'x': x,
        'y': y
    })

## Our Class Model

In [40]:
class RNN(nn.Module):
    def __init__(self, 
                 input_size, 
                 hidden_size, 
                 output_size, 
                 num_layers=1,
                 bidirectional=False,
                 model_type="RNN",
                 dropout=0.2):
        
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.model_type = model_type
        self.dropout = dropout
        
        self.rnn = getattr(nn, model_type)(input_size, 
                                           hidden_size, 
                                           num_layers=num_layers,
                                           dropout=dropout,
                                           bidirectional=bidirectional)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        output, hidden = self.rnn(x.view(1, -1), hidden)
        output = self.decoder(output)
        output = torch.nn.functional.softmax(output)
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.num_layers, self.hidden_size))

## Functions Training

In [41]:
def int_to_one_hot_vectors(i, length):
    tensor = [[0] * length]
    tensor[0][i] = 1
    return Variable(torch.Tensor(tensor))

In [42]:
def train(x, y):
    hidden = hidden_init
    rnn.zero_grad()
    loss = 0

    for i, elem in enumerate(x):
        output, hidden = rnn(elem, hidden)
        y_tensor = int_to_one_hot_vectors(y[i].data.tolist()[0], n_characters)
        loss += criterion(output, y_tensor)
        
    loss.backward()
    optimizer.step()
    return loss.data[0] / x.size()[0]

## Generate Words

In [43]:
def generate(text_start, predict_len=100):
    hidden = hidden_init   
    start_input, start_expected = TextToInput(text_start)

    # We start by learning the hidden layer from the start text
    if len(text_start) > 2:
        for inp in start_input[:-1]:
            _, hidden = rnn(inp, hidden)
    new_inp = start_input[-1]
    
    i = 0
    predicted_char = ""
    predicted = text_start
    while predicted_char != EOS and i < predict_len:
        output, hidden = rnn(new_inp, hidden)
 
        top_i = torch.topk(output, 1)[1].data.tolist()[0][0]
        
        # Add predicted character to string and use as next input
        predicted_char = list_characters[top_i]
        predicted += predicted_char
        new_inp = TextToOneHotVector(predicted_char)[0]
        i += 1
        
    return predicted

## Start training

In [44]:
rnn = RNN(input_size=n_characters, 
          hidden_size=100, 
          output_size=n_characters, 
          num_layers=1,
          bidirectional=False,
          model_type="RNN",
          dropout=0)

In [45]:
hidden_init = rnn.init_hidden()

In [46]:
n_epochs = 10
print_every = 1000
plot_every = 100

all_losses = []
loss_avg = 0

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.05)
criterion = nn.MSELoss()

list_random = list(np.random.randint(0, len(data_vectors), n_epochs))
for epoch in range(0, n_epochs):
    print("epochs {}".format(epoch))
    random.shuffle(data_vectors)
    for elem in tqdm.tqdm(data_vectors[:200]):
        loss = train(elem["x"], elem["y"])       
        loss_avg += loss
        
    print('Epochs: {}'.format(epoch))
    print(generate('Hi', 200), '\n')
    print(generate('Gr', 200), '\n')
    print(generate('Ju', 200), '\n')

    all_losses.append(loss_avg / len(data_vectors))
    loss_avg = 0


  0%|          | 0/200 [00:00<?, ?it/s]

epochs 0


[A
  2%|▏         | 3/200 [00:00<00:19, 10.27it/s][A
  2%|▏         | 4/200 [00:00<00:51,  3.82it/s][A
  3%|▎         | 6/200 [00:01<00:40,  4.79it/s][A
  4%|▍         | 9/200 [00:01<00:42,  4.52it/s][A
  5%|▌         | 10/200 [00:02<00:39,  4.80it/s][A
  6%|▌         | 11/200 [00:02<00:58,  3.22it/s][A
  6%|▋         | 13/200 [00:03<00:54,  3.44it/s][A
  8%|▊         | 16/200 [00:03<00:48,  3.80it/s][A
  9%|▉         | 18/200 [00:04<00:56,  3.19it/s][A
 10%|▉         | 19/200 [00:04<00:45,  4.01it/s][A
 10%|█         | 20/200 [00:04<00:36,  4.88it/s][A
 10%|█         | 21/200 [00:04<00:38,  4.62it/s][A
 11%|█         | 22/200 [00:05<01:04,  2.74it/s][A
 12%|█▎        | 25/200 [00:05<00:48,  3.60it/s][A
 13%|█▎        | 26/200 [00:06<00:40,  4.33it/s][A
 14%|█▎        | 27/200 [00:06<00:38,  4.45it/s][A
 14%|█▍        | 28/200 [00:06<00:38,  4.52it/s][A
 14%|█▍        | 29/200 [00:08<02:27,  1.16it/s][A
 16%|█▌        | 31/200 [00:09<02:03,  1.37it/s][A
100%|███████

Epochs: 0
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 1/200 [00:00<00:21,  9.46it/s]

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 1


100%|██████████| 200/200 [00:52<00:00,  4.31it/s]


Epochs: 1
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 2


100%|██████████| 200/200 [00:54<00:00,  3.66it/s]


Epochs: 2
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 3


100%|██████████| 200/200 [00:55<00:00,  3.60it/s]


Epochs: 3
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 4


100%|██████████| 200/200 [00:55<00:00,  1.67it/s]


Epochs: 4
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 5


100%|██████████| 200/200 [01:00<00:00,  3.28it/s]


Epochs: 5
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 6


100%|██████████| 200/200 [01:00<00:00,  3.84it/s]


Epochs: 6
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 1/200 [00:00<00:35,  5.55it/s]

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 7


100%|██████████| 200/200 [01:08<00:00,  3.98it/s]


Epochs: 7
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 8


100%|██████████| 200/200 [01:03<00:00,  3.22it/s]


Epochs: 8
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



  0%|          | 0/200 [00:00<?, ?it/s]

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

epochs 9


100%|██████████| 200/200 [01:04<00:00,  3.99it/s]


Epochs: 9
HiPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

GrPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 

JuPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP 



# Plotting the Training Losses

Plotting the historical loss from all_losses shows the network learning:

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

plt.figure()
plt.plot(all_losses)

In [None]:
loss = nn.MSELoss()
input = Variable(torch.randn(3, 5), requires_grad=True)
target = Variable(torch.randn(3, 5))
output = loss(input, target)
output.backward()

# Exercises

* Train with your own dataset, e.g.
    * Text from another author
    * Blog posts
    * Code
* Increase number of layers and network size to get better results