**Let's start with importing the libraries as follows:**

In [1]:
!pip install unidecode
import unidecode
import string
import random
import math

import torch
import torch.nn as nn
from torch.autograd import Variable

from keras.datasets import reuters



Using TensorFlow backend.


**As input and output, we can use any character:**

In [2]:
all_characters = string.printable
input_size = len(all_characters)
output_size = input_size
print(input_size)

100


**We need to define the hyperparameters before moving on:**

In [0]:
n_steps = 2000
batch_size = 512
hidden_size = 100
n_layers = 2
learning_rate = 0.01
len_text = 200
print_every = 50

**We will be using the Reuters datasets from Keras**

In [0]:
data = reuters.load_data()

len_data = len(data)

**Let's define a function that transforms characters to tensors:**

In [0]:
def char_to_tensor(string):
  tensor = torch.zeros(len(string)).long()
  for c in range(len(string)):
    try:
      tensor[c] = all_characters.index(string[c])
    except:
      continue
  return tensor

**Next, we define a batch generator:**

In [0]:
def batch_gen(length_text, batch_size):
  X = torch.LongTensor(batch_size, length_text)
  y = torch.LongTensor(batch_size, length_text)
  for i in range(batch_size):
    start_index = random.randint(0, len_data - length_text)
    end_index = start_index + length_text + 1
    text = data[start_index : end_index]
    X[i] = char_to_tensor(text[:-1])
    y[i] = char_to_tensor(text[1:])
    X = Variable(X)
    y = Variable(y)
    X = X.cuda()
    y = y.cuda()
    return X, y

**We are now ready to define our network architecture:**

In [0]:
class create_model(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, n_layers = 1):
    super(create_model, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.n_layers = n_layers
    self.encoder = nn.Embedding(input_size, hidden_size)
    self.rnn = nn.GRU(hidden_size, hidden_size, n_layers)
    self.decoder = nn.Linear(hidden_size, output_size)
    
  def forward(self, input, hidden):
    batch_size = input.size(0)
    encoded = self.encoder(input)
    output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
    output = self.decoder(output.view(batch_size, -1))
    return output, hidden
    
  def init_hidden(self, batch_size):
    return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))

**We continue by creating our model and defining the optimizer and loss function as follows:**

In [8]:
decoder_model = create_model(
    input_size,
    hidden_size,
    output_size,
    n_layers = n_layers,
)

opt = torch.optim.Adam(decoder_model.parameters(), 
                       lr = learning_rate)
loss = nn.CrossEntropyLoss()
decoder_model.cuda()

create_model(
  (encoder): Embedding(100, 100)
  (rnn): GRU(100, 100, num_layers=2)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)

**We also create a function that we can use to generate text during training:**

In [0]:
def generate_text(decoder, start = 'The', predict_len = 100):
  hidden = decoder.init_hidden(1).cuda()
  prime_input = Variable(char_to_tensor(start).unsqueeze(0)).cuda()
  predicted = start
  
  for p in range(len(start) - 1):
    _, hidden = decoder(prime_input[:, p], hidden)
    
  x = prime_input[:, -1]
  
  for p in range(predict_len):
    output, hidden = decoder(x, hidden)
    output_dist = output.data.view(-1).div(0.8).exp()
    
    # Add some randomness
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = all_characters[top_i]
    predicted += predicted_char
    x = Variable(char_to_tensor(predicted_char).unsqueeze(0)).cuda()
    
  return predicted

**Finally, let's start training:**

In [10]:
loss_avg = 0

for i in range(n_steps):
  X, y = batch_gen(len_text, batch_size)
  hidden = decoder_model.init_hidden(batch_size).cuda()
  decoder_model.zero_grad()
  loss_total = 0
  
  for c in range(len_text):
    output, hidden = decoder_model(X[:, c], hidden)
    loss_total += loss(output.view(batch_size, -1), y[:, c])
    
  loss_total.backward()
  opt.step()
  loss_value = loss_total.data[0] / len_text
  
  loss_avg += loss_value
  
  if i % print_every == 0:
    print('Epoch {}: loss {}'.format(i, loss_avg))
    print(generate_text(decoder_model, 'The', 100), '\n')

ValueError: ignored