# Data Prepration

In [128]:
from config import Config
import utils
from utils import EnvHndler
import os
import sympy as sp
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from datahandler import MathData, create_data_loader
import random

In [129]:
def set_seed(seed):
    """Set seed"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)
set_seed(0)

In [130]:
config = Config()

In [131]:
config.reload_size = 10

In [132]:
env = EnvHndler(config)

In [133]:
df = MathData(config, env, dtype = 'train')

In [134]:
df.data

[["sub Y' INT+ 1", 'x'],
 ["sub Y' INT+ 2", 'mul INT+ 2 x'],
 ["sub Y' mul INT+ 4 x", 'mul INT+ 2 pow x INT+ 2'],
 ["sub Y' add INT+ 2 mul INT+ 2 x", 'add pow x INT+ 2 mul INT+ 2 x'],
 ["sub Y' mul INT+ 3 pow x INT+ 2", 'pow x INT+ 3'],
 ["sub Y' pow x INT- 1", 'ln x'],
 ["sub Y' mul INT- 1 sin x", 'cos x'],
 ["sub Y' exp x", 'exp x'],
 ["sub Y' cos x", 'sin x'],
 ["sub Y' add INT+ 1 mul INT+ 4 x", 'add x mul INT+ 2 pow x INT+ 2']]

In [135]:
[env.word2id[x] for x in df.data[1][0].split()]

[67, 79, 71, 83]

In [136]:
len(df)

1152921504606846976

In [137]:
df[1000]

(['sub', "Y'", 'pow', 'x', 'INT-', '1'], ['ln', 'x'])

In [138]:
loader = create_data_loader(config, env, dtype = 'train')

In [139]:
it = iter(loader)

In [140]:
config.batch_size

10

In [141]:
batch = next(it)

In [142]:
def get_masks(slen, lengths, causal):
    """
    Generate hidden states mask, and optionally an attention mask.
    """
    assert lengths.max().item() <= slen
    bs = lengths.size(0)
    alen = torch.arange(slen, dtype=torch.long, device=lengths.device)
    mask = alen < lengths[:, None]

    # attention mask is the same as mask, or triangular inferior attention (causal)
    if causal:
        attn_mask = alen[None, None, :].repeat(bs, slen, 1) <= alen[None, :, None]
    else:
        attn_mask = mask

    # sanity check
    assert mask.size() == (bs, slen)
    assert causal is False or attn_mask.size() == (bs, slen, slen)

    return mask, attn_mask

In [143]:
(x,len_x), (y,len_y), _ = batch

In [144]:
slen = x.shape[0]

In [172]:
mask, am = get_masks(slen, len_x, causal = True)

In [173]:
len_x[6]

tensor(8)

In [174]:
 alen = torch.arange(slen, dtype=torch.long)

In [179]:
am[0]

tensor([[ True, False, False, False, False, False, False, False, False, False,
         False],
        [ True,  True, False, False, False, False, False, False, False, False,
         False],
        [ True,  True,  True, False, False, False, False, False, False, False,
         False],
        [ True,  True,  True,  True, False, False, False, False, False, False,
         False],
        [ True,  True,  True,  True,  True, False, False, False, False, False,
         False],
        [ True,  True,  True,  True,  True,  True, False, False, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True, False, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         False],
        [ True,  True,  True,  True,  Tr

In [168]:
mask

tensor([[ True,  True,  True,  True,  True,  True, False, False, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True],
        [ True,  True,  True,  True,  True,  True, False, False, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True],
        [ True,  True,  True,  True,  True,  True,  True,  True, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True, False, False,
         False],
        [ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True],
        [ True,  True,  True,  True,  True,  True, False, False, False, False,
         False]])

In [166]:
alen < len_x[:]

RuntimeError: The size of tensor a (11) must match the size of tensor b (10) at non-singleton dimension 0

# Model Prepration

In [1]:
from config import Config
import torch
import torch.nn as nn
import os
import pickle
import numpy as np
import random
import torch.nn.functional as F
import math
from Model import Multi_Head_Attention, Feed_Forward, get_masks, EncoderBlock, DecoderBlock, Transformers
from utils import EnvHndler

In [2]:
config = Config()

In [3]:
env = EnvHndler(config)

In [4]:
config.n_words

91

In [5]:
bs = 4
# qlen = 5
ilen = 5
olen = 3
config.model_dim = 16
dm = config.model_dim
head_dim = dm//config.num_head
x = torch.randint(low = 0, high = config.n_words, size = (ilen, bs)).long()
y = torch.randint(low = 0, high = config.n_words, size = (olen, bs)).long()
len_x = torch.tensor([2,4,5,3])
len_y = torch.tensor([1,3,2,3])
# Q = torch.randn((bs, qlen, dm))
# V = torch.randn((bs, qlen, dm))
# K = torch.randn((bs, qlen, dm))
# lenghts = torch.tensor([2,4,5,3])

In [7]:
x.shape

torch.Size([5, 4])

In [8]:
y.shape

torch.Size([3, 4])

In [5]:
enc = EncoderBlock(config)

In [6]:
dec = DecoderBlock(config)

In [7]:
N, slen, dm = x.size()
e_mask , e_atten_mask = get_masks(slen, len_x, causal = False)

In [8]:
N, tlen, dm = y.size()
d_mask , d_atten_mask = get_masks(tlen, len_y, causal = True)

In [9]:
src_mask = torch.arange(len_x.max(), dtype=torch.long, device=len_x.device) < len_x[:, None]

In [10]:
torch.equal(e_mask, src_mask)

True

In [11]:
encoded = enc(x, e_atten_mask, e_mask)

In [15]:
x.shape

torch.Size([4, 5, 16])

In [14]:
encoded.shape

torch.Size([4, 5, 16])

In [12]:
decoded = dec(y, encoded, d_atten_mask, d_mask, e_mask)

In [13]:
decoded.shape

torch.Size([4, 3, 16])

In [16]:
y.shape

torch.Size([4, 3, 16])

In [6]:
tr = Transformers(config)

In [7]:
enc = tr(mode = 'encode', x = x, len_x = len_x)

In [9]:
dec = tr(mode = 'decode', y = y, len_y = len_y, encoded = enc.transpose(0,1), len_enc = len_x)

In [11]:
# target words to predict
alen = torch.arange(len_y.max(), dtype=torch.long, device=len_y.device)
pred_mask = alen[:, None] < len_y[None] - 1  # do not predict anything given the last target word
t = y[1:].masked_select(pred_mask[:-1])
assert len(t) == (len_y - 1).sum().item()

In [22]:
len_y[None]

tensor([[1, 3, 2, 3]])

In [16]:
pred_mask[:,2]

tensor([ True, False, False])

In [21]:
y

tensor([[81, 78,  2,  0],
        [45, 89, 12, 84],
        [ 8, 19, 87, 49]])

In [20]:
t

tensor([89, 12, 84, 19, 49])

In [23]:
scores, loss = tr(mode = 'predict', tensor = dec, pred_mask = pred_mask, y = t, get_scores = True)

In [25]:
scores

tensor([[-3.0523e-01, -1.7260e-01,  1.6709e-01, -6.3807e-02, -1.2141e+00,
          1.1148e-01, -4.9745e-01,  1.2174e+00, -1.5522e-01, -1.5035e+00,
          1.8056e+00,  4.3814e-02, -2.7261e-01,  1.4488e+00, -7.3657e-01,
         -5.3078e-01,  9.9984e-01,  3.4513e-02,  2.0787e-01,  1.0790e+00,
         -1.4123e+00, -8.7211e-01,  2.4148e-01,  7.6112e-01, -1.7555e+00,
          8.8923e-01, -5.0359e-01,  9.4234e-02, -6.8736e-01,  2.5731e-01,
         -8.1520e-01, -8.2078e-01,  7.7107e-02, -2.1696e+00, -1.0375e-02,
          7.9694e-01, -4.4363e-01, -1.4612e+00,  2.2750e-01,  7.9702e-01,
          3.2155e-01,  7.8504e-01,  9.5178e-01, -9.5786e-03, -2.2148e+00,
         -5.4930e-01,  4.8773e-01, -1.1878e-01,  1.3475e+00,  4.4128e-01,
         -5.2684e-01,  1.8356e-01, -5.7675e-01, -5.5094e-01, -3.2704e-01,
          6.8494e-01, -1.5358e+00,  1.2058e+00,  1.6582e+00,  1.9032e-01,
          1.0826e+00, -6.7693e-01, -7.3593e-01,  2.7785e-01,  7.3701e-01,
         -1.4737e+00,  4.7136e-01,  1.