Logs   
- [2023/03/08]   
  Restart this notebook if you change the scratch library

In [1]:
import requests
import numpy as np
import tqdm

from bs4 import BeautifulSoup
from typing import Iterable, List, Dict
from scratch.linear_algebra import LinearAlgebra as la
from scratch.deep_learning import DeepLearning as dl
from scratch.deep_learning import \
  Layer, Tensor, Linear, Sequential, SoftmaxCrossEntropy, Momentum

## Previous classes and functions

In [2]:
# A function to randomly choose an index based on an arbitrary set of weights
# If we give the function with weight [1, 1, 3], then on-fifth of the time
# it will return 0, one-fifth of the time it will return 1, and three fifth
# of the time it will return 2. Five is from 1 + 1 + 3 = 5.
def sample_from(weights: List[float], rng) -> int:
  """returns i with probability weights[i] / sum(weights)""" 
  total = sum(weights)
  rnd = total * rng.random()        # uniform between 0 and total
  for i, w in enumerate(weights):   
    rnd -= w                        # return the smallest i such that
    if rnd <= 0:                    # weights[0] + ... + weights[i] >= rnd
      return i

In [3]:
class Vocabulary(object):
  def __init__(self, words: List[str] = None) -> None:
    self.w2i: Dict[str, int] = {}   # mapping word -> word_id
    self.i2w: Dict[int, str] = {}   # mapping word_id -> word

    for word in (words or []):      # If words were provided
      self.add(word)                # add them.


  @property 
  def size(self) -> int:
    """how many words are in the vocabulary""" 
    return len(self.w2i)

  def add(self, word: str) -> None:
    if word not in self.w2i:        # If the word is new to us:
      word_id = len(self.w2i)       # Find the next id. 
      self.w2i[word] = word_id      # Add to the word -> word_id map. 
      self.i2w[word_id] = word      # Add to the word_id -> word map. 

  def get_id(self, word: str) -> int: 
    """return the id of the word (or None)""" 
    return self.w2i.get(word)

  def get_word(self, word_id: int) -> str:
    """return the word with the given id (or None)""" 
    return self.i2w.get(word_id)

  def one_hot_encode(self, word: str) -> Tensor:
    word_id = self.get_id(word)
    assert word_id is not None, f"unknown word {word}"

    return [1.0 if i == word_id else 0.0 for i in range(self.size)]

## Recurrent Neural Networks

This kind of neural network will capture the order of the elements in 
the input sequence. This is done by introducing *hidden state* that
maintains between inputs.

**linear layer**
```
output[o] = dot(w[o], input) + b[o]
```

**recurrent layer**
```
output[0] = dot(w[o], input) + dot(u[o], hidden) + b[o]
```

 The following layer is a simplest RNN, you should use it in practice.
 The purpose of this simplest RNN for understanding the concept behind
 recurrent neural network. More standard RNN is GRU or LSTM that can 
 be generated using TensorFlow or PyTorch library

In [4]:
class SimpleRnn(Layer):
  """Just about the simplest possible recurrent layer.""" 
  def __init__(self, input_dim: int, hidden_dim: int, rng) -> None:
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim

    self.w = dl.random_tensor(hidden_dim, input_dim, init="xavier", rng=rng)
    self.u = dl.random_tensor(hidden_dim, hidden_dim, init="xavier", rng=rng)
    self.b = dl.random_normal(hidden_dim, rng=rng)

    self.reset_hidden_state()


  def reset_hidden_state(self) -> None:
    self.hidden = [0 for _ in range(self.hidden_dim)]

  
  def forward(self, input_: Tensor) -> Tensor:
    self.input = input_               # save both input and previous
    self.prev_hidden = self.hidden    # hidden state to use in backprop

    a = [(la.dot(self.w[h], input_) +         # weights @ input
          la.dot(self.u[h], self.hidden) +    # weights @ hidden
          self.b[h])                          # bias
         for h in range(self.hidden_dim)]

    self.hidden = dl.tensor_apply(dl.tanh, a)   # Apply tanh activation
    return self.hidden


  def backward(self, gradient: Tensor) -> None:
    # Backpropagate through the tanh
    a_grad = [gradient[h] * (1 - self.hidden[h] ** 2)
              for h in range(self.hidden_dim)]

    # b has the same gradient as a
    self.b_grad = a_grad

    # Each w[h][i] is multiplied by input[i] and added to a[h],
    # so each w_grad[h][i] = a_grad[h] * input[i]
    self.w_grad = [[a_grad[h] * self.input[i]
                    for i in range(self.input_dim)]
                      for h in range(self.hidden_dim)]

    # Eaach u[h][h2] is multiplied by hidden[h2] and added to a[h],
    # so each u_grad[h][h2] = a_grad[h] * prev_hidden[h2]
    self.u_grad = [[a_grad[h] * self.prev_hidden[h2]
                    for h2 in range(self.hidden_dim)]
                      for h in range(self.hidden_dim)]

    # Each input[i] is multiplied by every w[h][i] and added to a[h],
    # so each input_grad[i] = sum(a_grad[h] * w[h][i] for h in ...)
    return [sum(a_grad[h] * self.w[h][i] for h in range(self.hidden_dim))
            for i in range(self.input_dim)]

  def params(self) -> Iterable[Tensor]:
    return [self.w, self.u, self.b]

  def grads(self) -> Iterable[Tensor]:
    return [self.w_grad, self.u_grad, self.b_grad]

## Example: Using a Character-Level RNN

In this example, we want to generate an alternative brand name
from the list of top 100 brand names of successful startups

In [5]:
url = "https://www.ycombinator.com/topcompanies/"
soup = BeautifulSoup(requests.get(url).text, 'html5lib')

In [6]:
companies = [ahref.text for ahref in soup.find_all("a", "company-name")]
len(companies)

586

Build a `Vocabulary` from the characters names

In [7]:
vocab = Vocabulary([c for company in companies 
                        for c in company])

[vocab.get_word(id_) for id_ in range(vocab.size)]

['S',
 't',
 'r',
 'i',
 'p',
 'e',
 'I',
 'n',
 's',
 'a',
 'c',
 'O',
 'F',
 'B',
 'x',
 'D',
 'l',
 'R',
 'g',
 'G',
 'u',
 'o',
 'z',
 'y',
 ' ',
 'A',
 'h',
 'v',
 'C',
 'k',
 'Z',
 'M',
 'W',
 'b',
 'f',
 'w',
 'd',
 'E',
 'q',
 'm',
 'T',
 'P',
 'X',
 'L',
 'H',
 'N',
 '1',
 'J',
 'V',
 '.',
 'Q',
 'j',
 'K',
 '9',
 'Y',
 "'",
 ',',
 'U',
 '-',
 '6',
 '4',
 '5',
 '3',
 '0']

We need a token to signify the start and end of the company names.
Luckily these start and stop characters do not appear in the company names

In [8]:
START = '^'
STOP = "$"

vocab.add(START)
vocab.add(STOP)

### Create the model

We'll one-hot-encode each character, passit thorugh two `SimpleRnn`s,
and then use a `Linear` layer to generate the scores for each possible next
character

In [9]:
seed = 2023_05_04
rng = np.random.default_rng(seed)

HIDDEN_DIM = 32   # You should experiment with different sizes!

rnn1 = SimpleRnn(input_dim=vocab.size, hidden_dim=HIDDEN_DIM, rng=rng)
rnn2 = SimpleRnn(input_dim=HIDDEN_DIM, hidden_dim=HIDDEN_DIM, rng=rng)
linear = Linear(input_dim=HIDDEN_DIM, output_dim=vocab.size, rng=rng)

model = Sequential([
  rnn1,
  rnn2,
  linear])

### Train the model

In [12]:
# Function to generate the company name
def generate(seed: str = START, max_len: int = 50, rng=None) -> str:
  rnn1.reset_hidden_state()     # Reset both hidden states
  rnn2.reset_hidden_state()
  output = [seed]               # Start the output with the specified seed

  # Keep going until we produce the STOP character or reach the max length
  while output[-1] != STOP and len(output) < max_len:
    # Use the last character as the input
    input_ = vocab.one_hot_encode(output[-1])

    # Generate scores using the model
    predicted = model.forward(input_)

    # Convert them to probabilities and draw a random char_id
    probabilities = dl.softmax(predicted)
    next_char_id = sample_from(probabilities, rng)

    # Add the corresponding char to our output
    output.append(vocab.get_word(next_char_id))

  # Get rid of START and STOP characters and return the word
  return ''.join(output[1:-1])

In [13]:
loss = SoftmaxCrossEntropy()
optimizer = Momentum(learning_rate=0.01, momentum=0.9)

# Total training time 300 minutes 
for epoch in range(300):
  rng.shuffle(companies)      # Train in a differnt order each epoch. 
  epoch_loss = 0              # Track the loss. 

  for company in tqdm.tqdm(companies):
    rnn1.reset_hidden_state()     # Reset both hidden states.
    rnn2.reset_hidden_state() 
    company = START + company + STOP    # Add START and STOP characters

    # The rest is just our usual training loop, except that the inputs
    # and target are the one-hot-encoded previous and next characters.
    for prev, next_ in zip(company, company[1:]):
      input_ = vocab.one_hot_encode(prev)
      target = vocab.one_hot_encode(next_)
      predicted = model.forward(input_)
      epoch_loss += loss.loss(predicted, target)
      gradient = loss.gradient(predicted, target)
      model.backward(gradient)
      optimizer.step(model)

  # Each epoch, print the loss and also generate a name.
  print(epoch, epoch_loss, generate(rng=rng))

  # Turn down the learning rate for the last 100 epochs.
  # There's no principled reason for this, but it seems to work.
  if epoch == 200:
    optimizer.lr *= 0.1

100%|██████████| 586/586 [00:55<00:00, 10.55it/s]


0 17749.315081392997 CrsSdinnctii


100%|██████████| 586/586 [00:55<00:00, 10.63it/s]


1 17205.134826262944 Hayadnat S


100%|██████████| 586/586 [00:54<00:00, 10.79it/s]


2 16976.42295470643 AeraakTie


100%|██████████| 586/586 [00:54<00:00, 10.82it/s]


3 16761.137104912366 KNbir dIreEynhc


100%|██████████| 586/586 [00:54<00:00, 10.79it/s]


4 16530.228342236547 Luaee


100%|██████████| 586/586 [00:53<00:00, 10.86it/s]


5 16313.810636737939 Iabir


100%|██████████| 586/586 [00:54<00:00, 10.72it/s]


6 16048.610374360136 MagyZhilt


100%|██████████| 586/586 [00:54<00:00, 10.76it/s]


7 15878.46506469731 TibNonb Pererh calefarmacigy


100%|██████████| 586/586 [00:52<00:00, 11.06it/s]


8 15707.896646430572 SecRiid Lcamt


100%|██████████| 586/586 [00:52<00:00, 11.14it/s]


9 15543.65237800709 Nponan


100%|██████████| 586/586 [00:52<00:00, 11.10it/s]


10 15410.46875921924 ZliltoRine


100%|██████████| 586/586 [00:52<00:00, 11.15it/s]


11 15278.191902133607 Rarbirey


100%|██████████| 586/586 [00:52<00:00, 11.19it/s]


12 15090.015538184232 Grerlardens


100%|██████████| 586/586 [00:52<00:00, 11.21it/s]


13 14944.983467231837 Senviik


100%|██████████| 586/586 [00:52<00:00, 11.12it/s]


14 14750.24727519621 Tabt


100%|██████████| 586/586 [00:52<00:00, 11.11it/s]


15 14594.525716175089 Daivinelimidancire


100%|██████████| 586/586 [00:52<00:00, 11.14it/s]


16 14450.734230731441 KeduR


100%|██████████| 586/586 [00:52<00:00, 11.17it/s]


17 14290.176549952672 Fala


100%|██████████| 586/586 [00:52<00:00, 11.14it/s]


18 14105.790121997261 Rasalash


100%|██████████| 586/586 [00:52<00:00, 11.12it/s]


19 13958.159612211182 Stfnty


100%|██████████| 586/586 [00:52<00:00, 11.13it/s]


20 13841.315178819561 Abbe


100%|██████████| 586/586 [00:52<00:00, 11.11it/s]


21 13728.607953012666 Bbo


100%|██████████| 586/586 [00:52<00:00, 11.06it/s]


22 13613.747900359842 Axsre


100%|██████████| 586/586 [00:52<00:00, 11.07it/s]


23 13513.038698170905 Henogren


100%|██████████| 586/586 [00:52<00:00, 11.06it/s]


24 13387.603740066437 Dinely


100%|██████████| 586/586 [00:52<00:00, 11.06it/s]


25 13284.73520279218 Atass


100%|██████████| 586/586 [00:52<00:00, 11.09it/s]


26 13124.253405500338 Bundog


100%|██████████| 586/586 [00:52<00:00, 11.15it/s]


27 13095.517439819552 Curmiwe


100%|██████████| 586/586 [00:52<00:00, 11.09it/s]


28 13059.838076110396 Srummc.ing


100%|██████████| 586/586 [00:52<00:00, 11.09it/s]


29 12898.507629397722 Assgivls


100%|██████████| 586/586 [00:52<00:00, 11.14it/s]


30 12751.94028237859 Ippel


100%|██████████| 586/586 [00:53<00:00, 11.01it/s]


31 12625.980017250073 Herdo Bwomis


100%|██████████| 586/586 [00:53<00:00, 11.03it/s]


32 12543.273510088175 Eaghtremderees


100%|██████████| 586/586 [00:56<00:00, 10.46it/s]


33 12517.958766865482 6elmom


100%|██████████| 586/586 [00:53<00:00, 10.89it/s]


34 12432.23234620826 Rolly,al


100%|██████████| 586/586 [00:54<00:00, 10.79it/s]


35 12392.806565012545 6kume Cliat


100%|██████████| 586/586 [00:55<00:00, 10.63it/s]


36 12282.6275524506 Softvoma Haii Hex


100%|██████████| 586/586 [00:53<00:00, 10.85it/s]


37 12244.064330844303 Debofut


 21%|██▏       | 125/586 [00:11<00:39, 11.67it/s]