# Data Preparation

In [7]:
import os
import requests

def get_iwsltenvi_data(save: bool = True):
    train_en_url = 'https://nlp.stanford.edu/projects/nmt/data/iwslt15.en-vi/train.en'
    train_vi_url = 'https://nlp.stanford.edu/projects/nmt/data/iwslt15.en-vi/train.vi'
    test_en_url = 'https://nlp.stanford.edu/projects/nmt/data/iwslt15.en-vi/tst2013.en'
    test_vi_url = 'https://nlp.stanford.edu/projects/nmt/data/iwslt15.en-vi/tst2013.vi'

    train_en = requests.get(train_en_url).text
    train_vi = requests.get(train_vi_url).text
    test_en = requests.get(test_en_url).text
    test_vi = requests.get(test_vi_url).text

    store_pth = './data/iwsltenvi/'
    os.makedirs(store_pth, exist_ok=True)
    print('The Data would be stored in: ', store_pth)

    if save:
        with open(f'{store_pth}train_en.txt', 'w') as f:
            f.write(train_en)
        with open(f'{store_pth}train_vi.txt', 'w') as f:
            f.write(train_vi)
        with open(f'{store_pth}test_en.txt', 'w') as f:
            f.write(test_en)
        with open(f'{store_pth}test_vi.txt', 'w') as f:
            f.write(test_vi)
    
    print('Done!')
    


In [100]:
import torch

import torch.nn as nn

import torch.nn.functional as F

class test(nn.Module):
    def __init__(self, embeded_dim, n_heads=8):
        super(test, self).__init__()

        # Basic Attributes
        self.embeded_dim = embeded_dim
        self.n_heads = n_heads
        self.dk = embeded_dim // n_heads

        # Query, Key, Value : input_dim = d_model//n_heads = dk
        self.Q = nn.Linear(self.embeded_dim, self.embeded_dim, bias=False)
        self.K = nn.Linear(self.embeded_dim, self.embeded_dim, bias=False)
        self.V = nn.Linear(self.embeded_dim, self.embeded_dim, bias=False)

        self.out = nn.Linear(self.n_heads * self.dk, self.embeded_dim)
    
    def forward(self, x):
        # x: batch_size x seq_length x input_dim
        batch_size = x.shape[0]
        seq_length = x.shape[1]

        # batch_size x seq_length x embeded_dim -> batch_size x seq_length x n_heads x dk -> batch_size x n_heads x seq_length x dk
        query = self.Q(x).view(batch_size, seq_length, self.n_heads, self.dk).transpose(1, 2)
        key = self.K(x).view(batch_size, seq_length, self.n_heads, self.dk).transpose(1, 2)
        value = self.V(x).view(batch_size, seq_length, self.n_heads, self.dk).transpose(1, 2)
        print(query.shape, key.shape, value.shape)
        
        t1 = torch.matmul(query, key.transpose(-1, -2))
        print(t1.shape)

        out = F.softmax(torch.matmul(query, key.transpose(-1, -2))/self.dk, dim=-1) @ value

        return out

In [70]:
# x: batch_size, seq_len, d_model
x_size = (2, 3, 512)

x = torch.randn(x_size)
print(x)

tensor([[[ 0.7161, -0.6844, -0.3527,  ...,  0.3822, -0.8768, -0.2211],
         [-0.3501,  0.1464, -0.3773,  ..., -1.0206,  1.6364,  0.3222],
         [ 0.2844, -0.2456, -1.5573,  ...,  0.2985,  1.2504,  0.4836]],

        [[ 0.1789, -0.5546,  1.5709,  ..., -0.7770,  0.5980,  1.0149],
         [ 1.0401, -1.0058,  0.4600,  ...,  1.6577,  0.5777,  0.1325],
         [ 0.5442, -2.3042, -0.2675,  ...,  0.6823, -0.3283, -2.7384]]])


In [101]:
test_layer = test(embeded_dim=512)

In [102]:
output = test_layer(x)
print(output.shape)

torch.Size([2, 8, 3, 64]) torch.Size([2, 8, 3, 64]) torch.Size([2, 8, 3, 64])
torch.Size([2, 8, 3, 3])
torch.Size([2, 8, 3, 64])


In [95]:
test_tensor = torch.randn((2, 3, 512))
print(test_tensor.shape)

torch.Size([2, 3, 512])


In [97]:
c = test_tensor.reshape(2, 3, 8, -1).transpose(-1,-2)
print(c.shape)


torch.Size([2, 3, 64, 8])


In [99]:
print(torch.matmul(c.transpose(-1, -2), c).shape)

torch.Size([2, 3, 8, 8])


# 1