# Implementing wavenet 

In [3]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
import pandas as pd
%matplotlib inline

In [5]:
# load the dataset
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [6]:
# Define the encodeings for the letters
chars = sorted(list(set(''.join(words))))
len(chars)

26

In [7]:
stoi = {s: i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}
print(stoi)
print(itos)

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, '.': 0}
{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [8]:
block_size = 8

In [9]:
# Declare the function to build the dataset
def build_dataset(names):
    X, Y = [], []
    for name in names:
        name = name + '.'
        context = [0] * block_size
        for ch in name:
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    print(X.shape, Y.shape)
    return X, Y

In [10]:
# Build the train, test and validation set
import random
random.seed(42)

random.shuffle(words)

n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))

Xtr, Ytr = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xte, Yte = build_dataset(words[n2:])

torch.Size([182625, 8]) torch.Size([182625])
torch.Size([22655, 8]) torch.Size([22655])
torch.Size([22866, 8]) torch.Size([22866])


In [18]:
for i in range(7, 15):
    print(f"{''.join(itos[ch.item()] for ch in Xtr[i])} -> {itos[Ytr[i].item()]}")

........ -> d
.......d -> i
......di -> o
.....dio -> n
....dion -> d
...diond -> r
..diondr -> e
.diondre -> .


In [20]:
class Linear:
    def __init__(self, fan_in, fan_out, bias=True):
        self.weight = torch.randn((fan_in, fan_out)) / (fan_in ** 0.5)
        self.bias = torch.randn(fan_out) if bias == True else None

    def __call__(self, x):
        self.out = x @ self.weight
        if self.bias is not None:
            self.out += self.bias
        return self.bias

    def parameters(self):
        return [self.weight] + ([self.bias] if self.bias is not None else [])

In [21]:
class BatchNorm1d:
    def __init__(self, dim, eps=1e-5, momentum=0.1):
        self.gamma = torch.ones(dim)
        self.beta = torch.zeros(dim)

        self.eps = eps
        self.momentum = momentum
        self.training = True
        self.running_mean = torch.zeros(dim)
        self.running_var = torch.ones(dim)

    def __call__(self, x):
        if self.training:
            Xmean = x.mean(x.shape[:-1])
            Xvar = x.var(x.shape[:-1])
        else:
            Xmean = self.running_mean
            Xvar = self.running_var
        xhat = (x - Xmean) / torch.sqrt(Xvar + eps)
        self.out = self.gamma * xhat + self.beta
          
        if self.training:
            with torch.no_grad():
                self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * xmean
                self.running_var = (1 - self.momentum) * self.running_var + self.momentum * xvar
            return self.out
        
    def parameters(self):
        return [self.gamma, self.beta]

In [22]:
class Tanh:
  def __call__(self, x):
    self.out = torch.tanh(x)
    return self.out
  def parameters(self):
    return []

In [None]:
class Embedding:
    def __init__(self, num_embd, embd_dim):
        self.weight = torch.randn((num_embd, embd_dim))

    def __call__(self, IX):
        self.out = weight[IX]
        return self.out

    def parametes():
        return [self.weight]