In [43]:
import torch
from torch import nn
from torch.utils import data
from torch.nn import functional as F
from pathlib import Path

from network import dataset

In [316]:
class NeighbourEmbedding(nn.Module):
    
    def __init__(self, vocab_size, dimension):
        super().__init__()
        
        self.word_embed = nn.Embedding(vocab_size, dimension)
        self.linear1 = nn.Linear(2, 128)
        self.linear2 = nn.Linear(128, dimension)
        self.dropout1 = nn.Dropout(0.5)
        self.dropout2 = nn.Dropout(0.5)
    
    def forward(self, word, position):
        
        embedding = self.word_embed(word)
        
        pos = F.relu(self.linear1(position))
        pos = self.dropout1(pos)
        pos = F.relu(self.linear2(pos))
        pos = self.dropout1(pos)
        
        print(embedding.shape)
        print(pos.shape)
        concat = torch.cat((embedding, pos), dim=-1)
        
        return concat

In [307]:
class Embedder(nn.Module):
    
    def __init__(self, vocab_size, dimension, n_neighbours):
        super().__init__()
        
        self.neighbour_embeddings = []
        self.n_neighbours = n_neighbours
        for idx in range(self.n_neighbours):
            self.neighbour_embeddings.append(NeighbourEmbedding(vocab_size, dimension))
            
    def forward(self, words, positions):
        embedding_outputs = []
        for idx in range(self.n_neighbours):
            embedding_outputs.append(self.neighbour_embeddings[idx](words[:, idx], positions[:, idx, :]))
        
        return torch.cat(embedding_outputs, dim=1)

In [298]:
class Model(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim):
        super().__init__()
        
        # Field Embedding (Probably One hot encoded data)
        
        # Candidate Embedding Layer
        # One dense layer
        
        # Neighbour Word Embedding and Positional Embedding
        # Word Embedding - Embedding layer
        # Positional Embedding - 2 blocks of (Dense, Relu, Dropout, Dense, Relu, Dropout)
        # N number of Neighbours
        
        # Each neighbour goes through a Neighbour Encoding Attention layer of its own
        
        # All the neighbour encodings are concatenated together into one block
        # This concatenated encoding is concatenated with Candidate Positon Encoding for Candidate Encoding
        
        # This candidate encoding is merged with Field Embedding and produces a binary output score
        
        self.cand_embed = nn.Linear(2, 128)
        self.field_embed = nn.Linear(2, 128)
        
        self.neighbour_embeddings = Embedder(vocab_size, dimension, neighbours)
        
        
    
    def forward(self, neighbours, ):
        pass

In [154]:
field_dict = {'invoice_date':0, 'invoice_no':1, 'total':2}

In [155]:
this_dir = Path.cwd()
xmls_path = this_dir / "dataset" / "xmls"
ocr_path = this_dir / "dataset" / "tesseract_results_lstm"
image_path = this_dir / "dataset" / "images"
candidate_path = this_dir / "dataset" / "candidates"

In [130]:
datas = dataset.DocumentsDataset(xmls_path, ocr_path, image_path, candidate_path, field_dict)

Reading Annotations: 505it [00:00, 574.68it/s]
Attaching Candidate: 100%|██████████| 505/505 [00:01<00:00, 411.01it/s]
Attaching Neighbours:   1%|          | 4/505 [00:00<01:40,  4.98it/s]


KeyboardInterrupt: 

In [156]:
len(datas.vocab)

14947

In [157]:
datas[0][0]

tensor([ 1.0000,  0.6985,  0.0865,  1.0000,  0.0176, -0.0512,  2.0000,  0.0472,
        -0.0512,  3.0000,  0.0936, -0.0511,  4.0000,  0.1104, -0.0143,  5.0000,
        -0.0133, -0.0145])

In [313]:
embedder = Embedder(len(datas.vocab), 32, 5)

In [159]:
words = torch.LongTensor([datas[0][0][(idx * 3) + 3] for idx in range(5)])
cords = torch.tensor([[datas[0][0][(idx * 3) + 4], datas[0][0][(idx * 3) + 5]] for idx in range(5)])

In [160]:
words

tensor([1, 2, 3, 4, 5])

In [161]:
cords

tensor([[ 0.0176, -0.0512],
        [ 0.0472, -0.0512],
        [ 0.0936, -0.0511],
        [ 0.1104, -0.0143],
        [-0.0133, -0.0145]])

In [162]:
_ = embedder([words[:2], words[:2]], [cords[:2], cords[:2]])

In [167]:
_[0].shape

torch.Size([2, 256])

In [252]:
embd = NeighbourEmbedding(len(datas.vocab), 32)

In [195]:
embd(torch.tensor([[words[:2], words[:2]]]), torch.tensor([[cords[:2], cords[:2]]]))

ValueError: only one element tensors can be converted to Python scalars

In [196]:
datal = data.DataLoader(datas, batch_size=2)

In [198]:
x, Y = next(iter(datal))

In [228]:
x

tensor([[ 1.0000e+00,  6.9847e-01,  8.6545e-02,  1.0000e+00,  1.7647e-02,
         -5.1182e-02,  2.0000e+00,  4.7176e-02, -5.1182e-02,  3.0000e+00,
          9.3647e-02, -5.1091e-02,  4.0000e+00,  1.1035e-01, -1.4273e-02,
          5.0000e+00, -1.3294e-02, -1.4455e-02],
        [ 1.0000e+00,  2.2400e-01,  9.3091e-02,  2.2000e+01, -9.0941e-02,
         -5.6909e-02,  2.3000e+01, -3.0824e-02, -5.7909e-02,  2.4000e+01,
          1.1176e-02, -1.4909e-02,  2.5000e+01,  2.5647e-02,  9.0909e-05,
          2.6000e+01,  6.1176e-02,  0.0000e+00]])

In [278]:
cords

tensor([[[ 1.7647e-02, -5.1182e-02],
         [ 4.7176e-02, -5.1182e-02],
         [ 9.3647e-02, -5.1091e-02],
         [ 1.1035e-01, -1.4273e-02],
         [-1.3294e-02, -1.4455e-02]],

        [[-9.0941e-02, -5.6909e-02],
         [-3.0824e-02, -5.7909e-02],
         [ 1.1176e-02, -1.4909e-02],
         [ 2.5647e-02,  9.0909e-05],
         [ 6.1176e-02,  0.0000e+00]]])

In [286]:
cords[:,1,:]

tensor([[ 0.0472, -0.0512],
        [-0.0308, -0.0579]])

In [245]:
words = x.view([-1,6,3])[:,1:,:1]
cords = x.view([-1,6,3])[:,1:,1:]

In [248]:
words

tensor([[[ 1.],
         [ 2.],
         [ 3.],
         [ 4.],
         [ 5.]],

        [[22.],
         [23.],
         [24.],
         [25.],
         [26.]]])

In [337]:
words.type(torch.LongTensor).view(-1, 5)

tensor([[ 1,  2,  3,  4,  5],
        [22, 23, 24, 25, 26]])

In [314]:
_t = embedder(words.type(torch.LongTensor).view(-1, 5), cords)

torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])
torch.Size([2, 32])


In [315]:
_t.shape

torch.Size([4, 160])

In [None]:
# N = No. of neighbours, I = Input size, B = Batch size, D = Dimension
# I =  3 + (3 * N)
# [B,I]
# F = [B]
# C = [B]
# NE = [ 2 * D, N]
# [B, [NE]]

In [221]:
torch.tensor([1,2,3,4]).view([4,-1]).shape

torch.Size([4, 1])

In [322]:
%load_ext autoreload
%autoreload 2
from network import model

In [333]:
myModel = model.ReLIE(len(datas.vocab), 32, 5)

In [334]:
out = myModel(x)

torch.Size([2, 5])
torch.Size([2, 5, 2])


In [336]:
out.shape

torch.Size([2, 320])

In [335]:
x.shape

torch.Size([2, 18])

In [None]:
[ field id, x, y,  [1,x, y]* 5]