In [None]:
from google.colab import files
import collections
import numpy as np
import pandas as pd
from sklearn import preprocessing
import torch.nn.functional as F
import torch

In [None]:
files = files.upload()

In [None]:
def get_lines():
  with open('ptb.train.txt', 'r') as f:
    lines = f.readlines()
  return lines

lines = get_lines()
print(lines[0:1000])
print(len(lines))

In [None]:
def tokenize(lines, token):
  if token == 'word':
    Tokens = [line.split() for line in lines]
  elif token == 'char':
    Tokens = [list(line) for line in lines]

  return Tokens

Tokens = tokenize(lines, token='char')
print(Tokens[0:100])


In [None]:
def flatten(Tokens):
  return [item for i in Tokens for item in i]

tokens = flatten(Tokens)
print(tokens[0:100])
print(len(tokens))

In [None]:
def unique_tokens(tokens):
  unique_tok = []
  for i in tokens:
    if i not in unique_tok:
      unique_tok.append(i)

  return unique_tok

uniq_tokens = unique_tokens(tokens)
print(uniq_tokens[0:100])
print(len(uniq_tokens))

In [None]:
character_dict = {}
for e, char in enumerate(uniq_tokens):
  character_dict[char] = e
print(character_dict)

In [None]:
ptb_numerical = [character_dict[char] for char in tokens]
print(len(ptb_numerical))

In [None]:
print("".join([uniq_tokens[idx] for idx in ptb_numerical[:100]]))

In [None]:
def one_hot_data(numerical_list, vocab_size=50):
    result = torch.zeros((len(numerical_list), vocab_size))
    for i, idx in enumerate(numerical_list):
        result[i, idx] = 1.0
    return result

In [None]:
print(one_hot_data(ptb_numerical[:2]))

In [None]:
def textify(embedding):
    result = ""
    indices = torch.argmax(embedding, axis=1)
    for idx in indices:
        result += uniq_tokens[int(idx)]
    return result

In [None]:
seq_length = 64
num_samples = (len(ptb_numerical) - 1) // seq_length
dataset = one_hot_data(ptb_numerical[:num_samples * seq_length]).reshape(num_samples, seq_length, len(uniq_tokens))
dataset.shape

In [None]:
batch_size = 32
num_batches = len(dataset) // batch_size
train_iter = dataset[:num_batches * batch_size].reshape((batch_size, num_batches, seq_length, len(uniq_tokens)))
train_iter = train_iter.swapaxes(0, 1)
train_iter = train_iter.swapaxes(1, 2)
train_iter.shape

In [None]:
labels = one_hot_data(ptb_numerical[1:num_samples * seq_length + 1]).reshape(batch_size, num_batches, seq_length, len(uniq_tokens))
labels = labels.swapaxes(0, 1)
labels = labels.swapaxes(1, 2)
labels.shape

In [None]:
print(textify(train_iter[10, :, 3]))
print(textify(labels[10, :, 3]))

In [None]:
class RNN(torch.nn.Module):
  def __init__(self):
    
    self.W_xh = torch.normal(0, 0.01, (50, 256), requires_grad=True)
    self.W_hh = torch.normal(0, 0.01, (256, 256), requires_grad=True)
    self.b_h = torch.zeros(1, 256, requires_grad=True)

    self.W_hq = torch.normal(0, 0.01, (256, 50), requires_grad=True)
    self.b_q = torch.zeros(1, 50, requires_grad=True)
    self.params = [self.W_xh, self.W_hh, self.b_h, self.W_hq, self.b_q]
    for param in self.params:
      param.requires_grad_(True)
  
  def net(self, input, state):
    hidden_act = state
    outputs = []
    for X in input:
      hidden = torch.matmul(X, self.W_xh) + torch.matmul(hidden_act, self.W_hh) + self.b_h
      hidden_act = torch.nn.Tanh(hidden)
      outputs.append(torch.matmul(hidden_act, self.W_hq) + self.b_q)

      return output, hidden_act

  def init_rnn_hidden(self):
    return torch.zeros((1, 256))

In [None]:

W_xh = torch.normal(0, 0.01, (50, 256), requires_grad=True)
W_hh = torch.normal(0, 0.01, (256, 256), requires_grad=True)
b_h = torch.zeros(1, 256, requires_grad=True)

W_hq = torch.normal(0, 0.01, (256, 50), requires_grad=True)
b_q = torch.zeros(1, 50, requires_grad=True)
params = [W_xh, W_hh, b_h, W_hq, b_q]

In [None]:
def net(input, state):
  W_xh, W_hh, b_h, W_hq, b_q = params
  hidden_act = state
  outputs = []
  for X in input:
    hidden = torch.matmul(X, W_xh) + torch.matmul(hidden_act, W_hh) + b_h
    hidden_act = torch.tanh(hidden)
    outputs.append(softmax(torch.matmul(hidden_act, W_hq) + b_q))

  return (outputs, hidden_act)

In [None]:
def init_rnn_hidden():
  return torch.zeros((1, 256))

In [None]:
def cross_entropy(y_hat, y):
  return -torch.mean(torch.sum(y * torch.log(y_hat)))

In [None]:
def average_ce_loss(outputs, labels):
    assert(len(outputs) == len(labels))
    total_loss = 0.
    for (output, label) in zip(outputs,labels):
        total_loss = total_loss + cross_entropy(output, label)
    return total_loss / len(outputs)

In [None]:
def softmax(X):
  lin = (X - torch.max(X).reshape((-1, 1)))
  X_exp = torch.exp(lin)
  partition = X_exp.sum(1, keepdim=True)
  return X_exp / partition

In [None]:
def grad_clipping(net, theta):
    """Clip the gradient."""
    params = net
    norm = torch.sqrt(sum(torch.sum((p.grad**2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

In [None]:
rnn = net
criterion = average_ce_loss
params = params
lr = 0.005
optimizer = torch.optim.SGD(params, lr)
num_epochs = 75


for epoch in range(num_epochs):
  state = init_rnn_hidden()
  for i in range(num_batches):
    input = train_iter[i]
    train_labels = labels[i]
    state = state.detach()
    optimizer.zero_grad()
    y_hat, state = rnn(input, state)
    l = criterion(y_hat, train_labels)
    l.sum().backward()
    grad_clipping(params, 1)

    optimizer.step()

  with torch.no_grad():
    l_loss = criterion(y_hat, train_labels)
    print(f'loss on epoch {epoch} was {l_loss}')
    print(predict('on the other hand', 512))

In [None]:
def predict(prefix, num_chars):
  string = prefix
  sample_state = init_rnn_hidden()
  string_numerical = [character_dict[char] for char in prefix]
  input = one_hot_data(string_numerical)
  
  for i in range(num_chars):
    outputs, sample_state = rnn(input, sample_state)
    choice = np.random.choice(50, p=fix_p(np.asarray(outputs[-1][0])))
    string += uniq_tokens[choice]
    input = one_hot_data([choice])
  return string

In [None]:
def fix_p(p):
    if p.sum() != 1.0:
        p = p*(1./p.sum())
    return p

In [None]:
my_generator = np.random.default_rng()

In [None]:
prefix = 'my name is'
string = prefix
sample_state = init_rnn_hidden()
string_numerical = [character_dict[char] for char in prefix]
input = one_hot_data(string_numerical)

for i in range(20):
  with torch.no_grad():
    outputs, sample_state = rnn(input, sample_state)
    choice = np.random.choice(50, p=fix_p(np.asarray(outputs[-1][0])))
    string += uniq_tokens[choice]
    input = one_hot_data([choice])

In [None]:
a = fix_p(outputs[-1][0].sum())
a

tensor(1.)