# LAB 3.1 - CNS (Char RNN)

Import of libraries, fix of random seed and device.


In [1]:
import json
import os
import random
import numpy as np
import itertools
from typing import Callable
from tqdm.notebook import tqdm
import torch
from torchvision import datasets
import pandas as pd
import requests

seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = 'cuda'

# Bonus track 5 - The Unreasonable Effectiveness of Recurrent Neural Networks

Text import and basic preprocessing to extract unique chars and create the mapping char-index.

In [2]:
text = requests.get('https://www.gutenberg.org/files/1000/1000-0.txt').text[2215:505000].lower()
unique_chars = list(set(text))
char_to_index = {c: i for i, c in enumerate(unique_chars)}

len(text), len(unique_chars)

(497785, 59)

Functions able to extract X and Y tensors from text and to create training and test sets.

In [3]:
def create_xy_from_text(
    text: str, 
    max_length: int = 80, 
    skip_char: int = 3, 
    device: str = 'cpu'
  ) -> tuple[torch.tensor, torch.tensor]:
  """
  Function able to extract X and Y tensors from text.

  text: Text used for extraction.
  max_length: Max langth of each sentence that is one example of X.
  skip_char: Characters to skip from a sentence to another.
  device: Device used to allocate X and Y.

  returns:
    tuple[torch.tensor, torch.tensor]: X and Y tensors.
  """
  sentences, next_chars = [], []
  for i in range(0, len(text) - max_length, skip_char):
    sentences.append(text[i:i+max_length])
    next_chars.append(text[i+max_length])
  X = torch.zeros(max_length, len(sentences), len(unique_chars)).to(device)
  Y = torch.zeros(len(sentences), len(unique_chars)).to(device)
  for i, sentence in enumerate(tqdm(sentences, desc='dataset creation')):
    for j, char in enumerate(sentence):
      X[j, i, char_to_index[char]] = 1
    Y[i, char_to_index[next_chars[i]]] = 1
  return X, Y

def create_tr_ts(
    tr_text: str, 
    ts_text: str, 
    max_length: int = 80, 
    skip_char: int = 3, 
    device:str = 'cpu'
  ):
  """
  Function able to create training and test set from texts.

  tr_text: Text used for training extraction.
  ts_text: Text used for test extraction.
  max_length: Max langth of each sentence that is one example of X.
  skip_char: Characters to skip from a sentence to another.
  device: Device used to allocate TR and TS.

  returns:
    tuple[tuple, tuple]: Training and test sets.
  """
  TR = create_xy_from_text(tr_text, max_length=max_length, skip_char=skip_char, device=device)
  TS = create_xy_from_text(ts_text, max_length=max_length, skip_char=skip_char, device=device)
  return TR, TS

Training and test set creation.

In [4]:
TR, TS = create_tr_ts(text[:500000], text[500000:505000], max_length=60, skip_char=3, device=device)

TR[0].shape, TR[1].shape, TS[0].shape, TS[1].shape

dataset creation:   0%|          | 0/165909 [00:00<?, ?it/s]

dataset creation:   0%|          | 0/147 [00:00<?, ?it/s]

(torch.Size([60, 165909, 59]),
 torch.Size([165909, 59]),
 torch.Size([60, 147, 59]),
 torch.Size([147, 59]))

Char RNN model composed by a recurrent layer and a readout linear layer.

In [5]:
class CharRNN(torch.nn.Module):
  """
  Char RNN class.
  """

  def __init__(
      self, 
      input_size: int, 
      hidden_size: int, 
      output_size: int, 
      recurrent_layer: torch.nn.Module = torch.nn.RNN, 
      n_layers: int = 1, 
      bidirectional: bool = False, 
      device: str = 'cpu'
    ) -> None:
    """
    Char RNN constructor.

    input_size: Input size of the model.
    hidden_size: Hidden size of the model.
    output_size: Output size of the model.
    recurrent_layer: Recurrent layer constructor.
    n_layers: Number of deep layers.
    bidirectional: If true the recurrent layer is bidirectional.
    device: Device used to allocate the model.
    """
    super(CharRNN, self).__init__()
    self.recurrent_layer = recurrent_layer(
        input_size,
        hidden_size,
        num_layers=n_layers,
        bidirectional=bidirectional,
    ).to(device)
    D = 2 if bidirectional else 1
    self.readout = torch.nn.Linear(hidden_size * D, output_size).to(device)

  def forward(self, X: torch.tensor) -> torch.tensor:
    """
    Forward method of torch module.

    X: Inut tensor.

    returns:
      torch.tensor: Output tensor.
    """
    out, h = self.recurrent_layer(X)
    return self.readout(out[-1])


Training function to fit the char RNN model.

In [6]:
def train(
    model: CharRNN, 
    TR: tuple[torch.tensor], 
    TS: tuple[torch.tensor], 
    epochs: int = 10, 
    batch_size: int = 64, 
    lr: float = 0.001, 
    verbose: bool = False
  ) -> None:
  """
  Training function.

  model: Model to train.
  TR: Trining set.
  TS: Test set.
  epochs: Epochs of training.
  batch_size: Batch size.
  lr: Learning rate.
  verbose: Flag to print the more output info.
  """
  X_TR, Y_TR = TR
  X_TR_batches, Y_TR_batches = X_TR.split(batch_size, dim=1), Y_TR.split(batch_size)
  X_TS, Y_TS = TR
  X_TS_batches, Y_TS_batches = X_TS.split(batch_size, dim=1), Y_TS.split(batch_size)
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
  for epoch in tqdm(range(epochs), desc='training'):

    model.train()
    tr_loss = 0
    for X_batch, Y_batch in zip(X_TR_batches, Y_TR_batches):
      preds = model(X_batch)
      optimizer.zero_grad()
      loss = criterion(preds, Y_batch)
      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
      optimizer.step()
      tr_loss += loss.item()

    model.eval()
    ts_loss = 0
    for X_batch, Y_batch in zip(X_TS_batches, Y_TS_batches):
      preds = model(X_batch)
      loss = criterion(preds, Y_batch)
      ts_loss += loss.item()

    if verbose:
      tqdm.write(f'{epoch+1}/{epochs} - tr_loss: {tr_loss / len(X_TR_batches)} - ts_loss: {ts_loss / len(X_TS_batches)}')

Function able to generate the text using sampling with a certain temperature.

In [7]:
def sample(preds: torch.Tensor, temperature: float = 0.1) -> int:
  """
  Sampling function.

  preds: Predictions of model.
  temperature: Temperature that if increased gives more creativity.

  returns:
    int: Index of next char sampled.
  """
  preds_log = torch.log(preds) / temperature
  preds_exp = torch.exp(preds_log)
  softmax = preds_exp / preds_exp.sum()
  return torch.multinomial(softmax, len(softmax)).argmax().item()
  
def generate(
    model: CharRNN, 
    sentence: str, 
    max_lengh: int = 300, 
    temperature: float = 0.1, 
    device: str = 'cpu'
  ) -> str:
  """
  Function able to generate the text.

  model: Char RNN model used to generate text.
  sentence: Input sentence able to provide the starting point to the char RNN.
  max_lengh: Max number of characters to generate.
  temperature: Temperature of sampling.
  device: Device to allocate samples tensor.

  returns:
    str: Final sentence generated.
  """
  final_sencence = sentence
  for t in tqdm(range(max_lengh), desc='text generation'):
    sampled = torch.zeros(max_lengh, 1, len(unique_chars)).to(device)
    for i, char in enumerate(sentence):
      sampled[i, 0, char_to_index[char]] = 1
    preds = model(sampled)
    softmax = torch.nn.functional.softmax(preds[0], dim=-1)
    next_char = sample(softmax, temperature=temperature)
    sentence += unique_chars[next_char]
    final_sencence += unique_chars[next_char]
    sentence = sentence[1:]
  return final_sencence

Function able to create and train the char RNN model with different configurations and generate texts with different temperatures.

In [8]:
def run_configs(
    TR: tuple[torch.tensor, torch.tensor], 
    TS: tuple[torch.tensor, torch.tensor], 
    text_gen: str,
    configs: dict, 
    max_length: int = 80, 
    temperatures: list[float] = [0.05, 0.1, 0.3, 1, 3], 
    verbose: bool = False,
    device: str = 'cpu',
  ) -> None:
  """
  Function able to create aand train models with different configs and generate sentences with different temperatures.

  TR: Training set.
  TS: Test set.
  text_gen: Text used for text generation.
  configs: Configurations to try.
  max_length: Maximum length of chars in the generated text.
  temperatures: List of temperatures to try.
  verbose: Flag to have a verbose output.
  device: Device to allocate model and datasets.
  """
  if isinstance(configs, dict):
      configs = [dict(zip(configs.keys(), t)) for t in itertools.product(*configs.values())]
  for config in configs:
    model = CharRNN(
        len(unique_chars), 
        config['hidden_state'], 
        len(unique_chars), 
        n_layers=config['n_layers'], 
        bidirectional=config['bidirectional'], 
        recurrent_layer=config['recurrent_layer'],
        device=device
    )
    train(
        model, 
        TR, TS, 
        epochs=config['epochs'], 
        batch_size=config['batch_size'], 
        lr=config['lr'], 
        verbose=verbose,
    )
    for temperature in temperatures:
      print(f'-----temperature={temperature}-----')
      generated_text = generate(model, text_gen, max_lengh=max_length, device=device)
      print(generated_text)

Run some configurations to train models and see generated texts with different temperatures.

In [None]:
run_configs(
    TR=TR,
    TS=TS,
    text_gen=text[:60],
    max_length=300,
    configs=dict(
        hidden_state=[128],
        n_layers=[1],
        bidirectional=[True],
        epochs=[30],
        batch_size=[1024],
        lr=[0.01],
        recurrent_layer=[torch.nn.LSTM],
    ),
    temperatures=[0.05, 0.1, 0.3, 1, 3],
    verbose=True,
    device=device,
)

training:   0%|          | 0/30 [00:00<?, ?it/s]

1/30 - tr_loss: 2.1732798368652904 - ts_loss: 2.471022209141152
2/30 - tr_loss: 1.8341845946809265 - ts_loss: 2.106832553089762
3/30 - tr_loss: 1.7014678182777452 - ts_loss: 1.9937311809487137
4/30 - tr_loss: 1.5994787695217718 - ts_loss: 1.8133733199875048
5/30 - tr_loss: 1.5274047361561125 - ts_loss: 1.7385903542941332
6/30 - tr_loss: 1.4739932320966311 - ts_loss: 1.5870479798755763
7/30 - tr_loss: 1.4317158530094871 - ts_loss: 1.5416934288169708
8/30 - tr_loss: 1.3972406049996067 - ts_loss: 1.4815285174758888
9/30 - tr_loss: 1.3686182871003825 - ts_loss: 1.4518045613005117
10/30 - tr_loss: 1.3421542235861528 - ts_loss: 1.4666463975891746
11/30 - tr_loss: 1.3254331353983264 - ts_loss: 1.5415047692374948
12/30 - tr_loss: 1.30837952578726 - ts_loss: 1.4183564390872885
13/30 - tr_loss: 1.292881746416443 - ts_loss: 1.3643420339087766
14/30 - tr_loss: 1.275907007828812 - ts_loss: 1.3858851380417683
15/30 - tr_loss: 1.268372419238822 - ts_loss: 1.3307350551812378
16/30 - tr_loss: 1.2551667