<a href="https://colab.research.google.com/github/PhatHuynhTranSon99/Text-Generation/blob/master/Pytorch_TextGeneration_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text generation in Andy Weir style using GRU units

## Library import

In [1]:
import torch
import pickle
import random
import numpy as np

## Utility functions

In [2]:
def save_as_pickle(object, file_name):
    with open(file_name, "wb") as handle:
        pickle.dump(object, handle, pickle.HIGHEST_PROTOCOL)


def load_from_pickle(file_name):
    with open(file_name, "rb") as handle:
        result = pickle.load(handle)
    return result

## Create a model

In [3]:
class NGramsModel(torch.nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        # Call constructor of parents
        super(NGramsModel, self).__init__()

        # Cache the sizes
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        # Create the embedding layer
        self.embeddings = torch.nn.Embedding(vocab_size, embedding_size)

        # Create gru layer
        self.gru = torch.nn.GRU(embedding_size, hidden_size, batch_first=True)

        # Create hidden dense layer
        self.dense = torch.nn.Linear(hidden_size, vocab_size)

    def forward(self, inputs):
        # Get the embedding from inputs
        embeddings = self.embeddings(inputs)

        # Expand embedding by adding an extra dimension
        embeddings = embeddings.view(1, embeddings.shape[0], embeddings.shape[1])

        # Pass embedding as input into gru and get 
        # Hidden state for each word in the sentence
        hidden_states, _ = self.gru(embeddings)

        # Pass h_t into linear layer and get the result
        result = self.dense(hidden_states)
        return result

## Load data

In [5]:
# Path of saved pickle files
# IMPORTANT: These files can be found in checkpoints folder of the github repo
# Put it in the colab folder and then run this cell
TOKEN_PATH = "token.pickle"
WORD2INDEX_PATH = "word2index.pickle"
NGRAMS_PATH = "ngrams.pickle"

# Load tokens, word2index mapping and ngrams from pickle files
tokens = load_from_pickle(TOKEN_PATH)
word2index = load_from_pickle(WORD2INDEX_PATH)
ngrams = load_from_pickle(NGRAMS_PATH)

## Training process

In [None]:
# Create model
model = NGramsModel(
    vocab_size=len(word2index),
    embedding_size=50,
    hidden_size=64
)

# Load in the saved weight
model.load_state_dict(torch.load("model.pth"))

# Create loss and optimizer
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1, momentum=0.1)

# Model to CUDA
model.cuda()

NGramsModel(
  (embeddings): Embedding(8136, 50)
  (gru): GRU(50, 64, batch_first=True)
  (dense): Linear(in_features=64, out_features=8136, bias=True)
)

In [None]:
# Start training
EPOCHS = 20
for epoch in range(EPOCHS):
  # Shuffle n grams
  random.shuffle(ngrams)

  # Save average loss
  avg_loss = 0

  # Stochastic gradient descent
  for i in range(len(ngrams)):
    # Display progress
    if i % 1000 == 0:
      print(f"Iteration: {i}")

    # Unpack to get inputs and labels
    inputs, labels = ngrams[i]
    
    # Convert to indices
    inputs = torch.tensor([word2index[token] for token in inputs], dtype=torch.long)
    labels = torch.tensor([word2index[token] for token in labels], dtype=torch.long)

    # Inputs and labels to cuda
    inputs = inputs.to("cuda:0")
    labels = labels.to("cuda:0")

    # Reset optimizer
    optimizer.zero_grad()

    # Run through model and get the loss
    result = model(inputs)
    result = result.view(result.shape[1], result.shape[2]) # Remove the extra dimension by GRU layer
    current_loss = loss(result, labels)

    # Add to average loss
    avg_loss += current_loss.item()

    # Perform backprop
    current_loss.backward()
    optimizer.step()

  # Calculate and display current loss
  avg_loss = avg_loss / len(ngrams)
  print(f"Epoch: {epoch + 1}, Average loss: {avg_loss}")

NameError: ignored

In [None]:
# Save model
torch.save(model.state_dict(), "model.pth")

## Generate text using trained model

In [6]:
# Re-create model from saved weights
model = NGramsModel(
    vocab_size=len(word2index),
    embedding_size=50,
    hidden_size=64
)

# Load in the saved weight
# IMPORTANT: This file can be found in the checkpoints folder of github repo
# Download it and upload on this colab folder and then run the cell
model.load_state_dict(torch.load("model.pth"))

# Convert model to cuda
model.cuda()

NGramsModel(
  (embeddings): Embedding(8136, 50)
  (gru): GRU(50, 64, batch_first=True)
  (dense): Linear(in_features=64, out_features=8136, bias=True)
)

In [7]:
# Create index to word mapping
index2word = { v: k for k, v in word2index.items() }

In [8]:
# Softmax
def softmax(x):
  return np.exp(x) / np.sum(np.exp(x))

In [13]:
# Prediction phase
prompt = ["“", "this", "is", "wrong"]
completed = prompt + []

# Run prompt though model
with torch.no_grad():
  for i in range(80):
    # Convert to indices
    inputs = torch.tensor([word2index[token] for token in prompt], dtype=torch.long)
    inputs = inputs.to("cuda:0")

    # Predict next words
    result = model(inputs)
    result = result.view(result.shape[1], result.shape[2]) # Remove the extra dimension by GRU layer

    # Test print
    probs = result[-1].cpu().detach().numpy()
    prediction = np.random.choice(
        a=len(word2index),
        p=softmax(probs)
    )

    completed.append(index2word[prediction])

    # Concat to prompt
    prompt = prompt[1:]
    prompt.append(index2word[prediction])

# Print the complete sentence
print(" ".join(completed))

“ this is wrong . this is that i think where it launches 40 sols before mars . i 'll have a word at different rates . the side is : we do n't the pop tent . lightweight string woven loosely into he ’ ll do n't see what speak : staring at him , blah , blah , ” johanssen smiled . “ actually , ” lewis said . “ watney ? ” lewis asked from the wall . and i told
