# Simulate Data


Given a field name, and first 2 dimensional numbes, simulate tensor.

In [1]:
from fieldnn.utils.layerfn import traverse
from fieldnn.utils.simulate import get_next_info, get_simulated_tensor_from_fldname

# Holder

1. Current Layer:
```python
leng_mask = info_idx == 0 # or info_idx != 0
leng = leng_mask.sum(-1)
```

2. Transfer

```python
old_leng = leng
```


3. Next Layer
```python
leng_mask = old_leng != 0
leng = (leng_mask == 0).sum(-1)
```

In [2]:
# leng_mask = info_idx == 0

import torch

def get_Layer2Holder(fullname, holder, Ignore_PSN_Layers = ['B', 'P']):
    # holder = holder
    d = {}
    for layername in list(reversed(fullname.split('-'))):
        if layername in Ignore_PSN_Layers: continue
        leng_mask = holder == 0
        leng = (leng_mask == 0).sum(-1)
        psn_idx = (leng_mask == False).cumsum(-1).masked_fill(leng_mask, 0)
        d[layername] = {'holder': holder, 
                        'leng_mask': leng_mask, 
                        'leng': leng, 
                        'psn_idx': psn_idx}
        # d[layername] = holder, psn_idx
        holder = leng
    Layer2Hoder = d
    return Layer2Hoder

In [3]:

def align_psn_idx(source_layer, current_layer, Layer2Idx, Layer2Holder):
    if source_layer == current_layer:
        psn_idx = Layer2Holder[current_layer]['psn_idx']
        return psn_idx
    else:
        source_psn_idx = Layer2Holder[source_layer]['psn_idx']
        current_leng_mask = Layer2Holder[current_layer]['leng_mask']
        gaps = Layer2Idx[current_layer] - Layer2Idx[source_layer]
        # print(gaps)
        # print(layername)
        # print(prev_info.shape)
        # print(leng_mask.shape)
        # print(leng.shape)
        # print(psn_idx.shape)
        shape0 = list(source_psn_idx.shape) + [1] * gaps
        shape1 = current_leng_mask.shape
        psn_idx = source_psn_idx.view(*shape0).expand(shape1).masked_fill(current_leng_mask, 0)
        # print(cpsn_idx.shape)
        return psn_idx

In [4]:
import torch
# ======= within forward

###############
B_lenP = 3
B2P_lnEC = [6, 5, 2] # 
prefix_layers_num = 2
vocab_size = 5001
Ignore_PSN_Layers = ['B', 'P']
###############


fullname = 'B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn'
info_idx = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
print(info_idx.shape)
holder = torch.LongTensor(info_idx)


############### gsn_embeddings
Layer2Idx = {v:idx for idx, v in enumerate(fullname.split('-'))}
name = fullname.split('-')[-1]
Layer2Holder = get_Layer2Holder(fullname, holder, Ignore_PSN_Layers)
psn_layers = list(reversed([i for i in Layer2Idx if i not in Ignore_PSN_Layers]))


print(Layer2Idx)
print(name)
print([i for i in Layer2Holder], '<---- Layer2Holder')
print(psn_layers, '<---- psn_layers')

for source_layer in psn_layers:
    cpsn_idx = align_psn_idx(source_layer, name, Layer2Idx, Layer2Holder)
    print(source_layer, cpsn_idx.shape)

1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 3)
3
3 --> (3, 6, 3)
4 --> (3, 6, 3, 3)
(3, 6, 3, 3)
{'B': 0, 'PatRec:EC': 1, 'ECRec:Diag': 2, 'DiagRec:DiagV': 3, 'DiagVdftGrn': 4}
DiagVdftGrn
['DiagVdftGrn', 'DiagRec:DiagV', 'ECRec:Diag', 'PatRec:EC'] <---- Layer2Holder
['DiagVdftGrn', 'DiagRec:DiagV', 'ECRec:Diag', 'PatRec:EC'] <---- psn_layers
DiagVdftGrn torch.Size([3, 6, 3, 3])
DiagRec:DiagV torch.Size([3, 6, 3, 3])
ECRec:Diag torch.Size([3, 6, 3, 3])
PatRec:EC torch.Size([3, 6, 3, 3])


# Embedding (Expander)

## Module

In [5]:
# fieldlm.nn.embedding
import os
import torch
import numpy as np

class EmbeddingLayer(torch.nn.Module):

    def __init__(self, 
                 input_size, 
                 embedding_size, 
                 init = 'init', 
                 freeze = False):
        
        super(EmbeddingLayer, self).__init__()
        
        # (+) self.embedding
        if type(init) == np.ndarray:
            # 1. from given array
            weight = torch.FloatTensor(init)
            assert weight.shape == (input_size, embedding_size)
            self.embedding = torch.nn.Embedding.from_pretrained(weight, freeze = freeze)
            
        elif os.path.isfile(init):
            weight = torch.FloatTensor(np.load(init))
            assert tuple(weight.shape) == (input_size, embedding_size)
            
            self.embedding = torch.nn.Embedding.from_pretrained(weight, freeze = freeze)
            
        else:
            # from random initialization
            self.embedding = torch.nn.Embedding(input_size, embedding_size, padding_idx = 0)
        
    def forward(self, info):
        # tensor0 to tensor1
        info = self.embedding(info)
        return info

## Config

In [6]:
[i for i in Layer2Holder]

['DiagVdftGrn', 'DiagRec:DiagV', 'ECRec:Diag', 'PatRec:EC']

In [9]:
Layer2Holder['ECRec:Diag']

{'holder': tensor([[2, 1, 3, 3, 2, 3],
         [2, 1, 3, 1, 1, 0],
         [3, 2, 0, 0, 0, 0]]),
 'leng_mask': tensor([[False, False, False, False, False, False],
         [False, False, False, False, False,  True],
         [False, False,  True,  True,  True,  True]]),
 'leng': tensor([6, 5, 2]),
 'psn_idx': tensor([[1, 2, 3, 4, 5, 6],
         [1, 2, 3, 4, 5, 0],
         [1, 2, 0, 0, 0, 0]])}

In [10]:
embed_size = 512
vocab_size = 5000

embed_para =  {'embedding_size': embed_size,
               'init': 'random', 
               'input_size': vocab_size + 1 } # 1:the size of special tokens
embed_para

{'embedding_size': 512, 'init': 'random', 'input_size': 5001}

In [11]:
def generate_psn_embed_para(layername, embed_size):
    
    if 'Grn' not in layername: 
        vocab_size = 100
    else:
        vocab_size = 512
        
    embed_para = {'embedding_size': embed_size,
                  'init': 'random', 
                  'input_size': vocab_size + 1 }
    return embed_para

In [12]:
d = {}
d[fullname] = embed_para



psn_layers = [i for i in Layer2Holder]

for layername in psn_layers:
    if layername == 'P': break
    embed_para = generate_psn_embed_para(layername, embed_size)
    d[f'{layername}_psn'] = generate_psn_embed_para(layername, embed_size)
    
d

{'B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn': {'embedding_size': 512,
  'init': 'random',
  'input_size': 5001},
 'DiagVdftGrn_psn': {'embedding_size': 512,
  'init': 'random',
  'input_size': 513},
 'DiagRec:DiagV_psn': {'embedding_size': 512,
  'init': 'random',
  'input_size': 101},
 'ECRec:Diag_psn': {'embedding_size': 512,
  'init': 'random',
  'input_size': 101},
 'PatRec:EC_psn': {'embedding_size': 512, 'init': 'random', 'input_size': 101}}

In [13]:
for nn_name, layer_para in d.items():
    print(nn_name)
    print(layer_para)

B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn
{'embedding_size': 512, 'init': 'random', 'input_size': 5001}
DiagVdftGrn_psn
{'embedding_size': 512, 'init': 'random', 'input_size': 513}
DiagRec:DiagV_psn
{'embedding_size': 512, 'init': 'random', 'input_size': 101}
ECRec:Diag_psn
{'embedding_size': 512, 'init': 'random', 'input_size': 101}
PatRec:EC_psn
{'embedding_size': 512, 'init': 'random', 'input_size': 101}


## Usage 

In [14]:
NN_Dict = {}

for nn_name, layer_para in d.items():
    layer = EmbeddingLayer(**layer_para)
    NN_Dict[nn_name] = layer
    
NN_Dict

{'B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn': EmbeddingLayer(
   (embedding): Embedding(5001, 512, padding_idx=0)
 ),
 'DiagVdftGrn_psn': EmbeddingLayer(
   (embedding): Embedding(513, 512, padding_idx=0)
 ),
 'DiagRec:DiagV_psn': EmbeddingLayer(
   (embedding): Embedding(101, 512, padding_idx=0)
 ),
 'ECRec:Diag_psn': EmbeddingLayer(
   (embedding): Embedding(101, 512, padding_idx=0)
 ),
 'PatRec:EC_psn': EmbeddingLayer(
   (embedding): Embedding(101, 512, padding_idx=0)
 )}

In [15]:
import torch

########################
B_lenP = 3
B2P_lnEC = [6, 4, 3] # 
prefix_layers_num = 2
vocab_size = 100
########################

fullname = 'B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn'
layer2layeridx = {v:idx for idx, v in enumerate(fullname.split('2'))}
name = fullname.split('-')[-1]

data = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
print(data.shape)
# fld_tensor_idx


info_idx = torch.LongTensor(data)
print(info_idx.shape)
holder = info_idx
# grn_leng_mask = info_idx == 0
# print(grn_leng_mask.shape)

Layer2Holder = get_Layer2Holder(fullname, holder)
for i in Layer2Holder: print(i)

1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 4)
3
3 --> (3, 6, 4)
4 --> (3, 6, 4, 4)
(3, 6, 4, 4)
torch.Size([3, 6, 4, 4])
DiagVdftGrn
DiagRec:DiagV
ECRec:Diag
PatRec:EC


In [16]:
# embed
Embed = NN_Dict[fullname]
Embed

EmbeddingLayer(
  (embedding): Embedding(5001, 512, padding_idx=0)
)

In [17]:
info = Embed(info_idx)
# tensor_name = tensor_name.replace('_idx', '2Feat_flt')
print(info.shape)
# print(tensor_name)

torch.Size([3, 6, 4, 4, 512])


In [18]:
# tensor_name.replace('_idx', '2Feat_flt')

# LMEmbeding (Expander)

TODO: adding huggingface

# Transformer (Learner)

## Module

In [20]:
import torch
import numpy as np
import torch.nn.functional as F

class TFMLayer(torch.nn.Module):
    def __init__(self, 
                 input_size = 512, 
                 output_size = 512, # d_model
                 nhead = 8,
                 num_encoder_layers = 6, # only have encoder part
                 num_decoder_layers = 0, # in default, we don't need decoder part. 
                 dim_feedforward = 2048, 
                 tfm_dropout = 0.1,
                 tfm_activation = 'relu'):
        
        '''https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/transformer.py'''

        super(TFMLayer,self).__init__()
        self.num_encoder_layers = num_encoder_layers
        self.num_decoder_layers = num_decoder_layers
        self.input_size = input_size
        self.tfm_input_size = input_size
        self.n_directions = 1
        self.output_size = output_size
        assert output_size % self.n_directions == 0 
        self.hidden_size = int(output_size / self.n_directions)
        assert self.hidden_size == self.tfm_input_size
            
        self.transformer  = torch.nn.Transformer(d_model = self.hidden_size, 
                                                 nhead = nhead,
                                                 num_encoder_layers = self.num_encoder_layers,
                                                 num_decoder_layers = self.num_decoder_layers,
                                                 dim_feedforward = dim_feedforward, 
                                                 dropout = tfm_dropout,
                                                 activation = tfm_activation,
                                                 batch_first = True,
                                                 # src_mask_flag = False, # see all tokens in a sentence 
                                                 # # This IS THE NEW PART. NOT PyTorch.nn.
                                                 ) 


    def forward(self, info, leng_mask):
        info = self.transformer(info, info, src_key_padding_mask = leng_mask,  tgt_key_padding_mask  = leng_mask)
        # for layer in self.postprocess:
        #     info = layer(info)
        return info

## Config

In [21]:
tfm_para =  {'input_size': 512,
             'output_size': 512,
             'nhead': 8,
             'num_encoder_layers': 6,
             'num_decoder_layers': 0,
             'dim_feedforward': 2048,
             'tfm_dropout': 0.1,
             'tfm_activation': 'relu'}
tfm_para

{'input_size': 512,
 'output_size': 512,
 'nhead': 8,
 'num_encoder_layers': 6,
 'num_decoder_layers': 0,
 'dim_feedforward': 2048,
 'tfm_dropout': 0.1,
 'tfm_activation': 'relu'}

## Usage

In [22]:
tfm_layer = TFMLayer(**tfm_para)
tfm_layer

TFMLayer(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
          )
          (linear1): Linear(in_features=512, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=512, bias=True)
          (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
          )
          (linear1): Linear(in_features=512, o

In [25]:
print(fullname)
print(info_idx.shape)
info = Embed(info_idx)
print(info.shape)

B-PatRec:EC-ECRec:Diag-DiagRec:DiagV-DiagVdftGrn
torch.Size([3, 6, 4, 4])
torch.Size([3, 6, 4, 4, 512])


In [26]:
print(info_idx.shape)
leng_mask = info_idx == 0
print(info.shape)
print(leng_mask.shape)

torch.Size([3, 6, 4, 4])
torch.Size([3, 6, 4, 4, 512])
torch.Size([3, 6, 4, 4])


### Reshape

In [27]:
nbs = np.array(info.shape[:-2]).prod()
ngrn, dim = info.shape[-2:]
print(nbs, ngrn, dim)


tmp_info = info.contiguous().view(nbs, ngrn, dim)
print(tmp_info.shape)

tmp_leng_mask = leng_mask.contiguous().view(nbs, ngrn)
print(tmp_leng_mask.shape)

tmp_leng = (tmp_leng_mask == 0).sum(-1)
print(tmp_leng.shape)

72 4 512
torch.Size([72, 4, 512])
torch.Size([72, 4])
torch.Size([72])


In [28]:
# order sequences and restore sequences according to their lenghts
# TODO: test the speed of orderSeq and restoreSeq

def orderSeq(seq_unordered, leng_unordered):
    # leng_unordered is a tensor
    # seq_unordered is a numpy
    leng_ordered, seq_index = leng_unordered.sort(descending=True) 
    _, reverse_index = seq_index.sort()
    leng_ordered = leng_ordered[leng_ordered>0]
    seq_index    = seq_index[:len(leng_ordered)]
    seq_ordered  = seq_unordered[seq_index.cpu()]
    return seq_ordered, leng_ordered, reverse_index

In [29]:
tmp_info = info.contiguous().view(nbs, ngrn, dim)
print(tmp_info.shape)

ord_info,      ord_leng, r_idx = orderSeq(tmp_info, tmp_leng)
ord_leng_mask, ord_leng, r_idx = orderSeq(tmp_leng_mask, tmp_leng)
print(ord_info.shape)
print(ord_leng.shape)
print(r_idx.shape)
print(ord_leng)

torch.Size([72, 4, 512])
torch.Size([32, 4, 512])
torch.Size([32])
torch.Size([72])
tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 1, 1, 1, 1, 1, 1])


In [30]:
print(tmp_leng_mask.shape)
ord_leng_mask, ord_leng, r_idx = orderSeq(tmp_leng_mask, tmp_leng)
print(ord_leng_mask.shape)
print(ord_leng.shape)
print(r_idx.shape)
print(ord_leng)

torch.Size([72, 4])
torch.Size([32, 4])
torch.Size([32])
torch.Size([72])
tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 1, 1, 1, 1, 1, 1])


### Calculate

In [31]:
ord_info_output = tfm_layer(ord_info, ord_leng_mask)
print(ord_info_output.shape)

torch.Size([32, 4, 512])


### Restore

In [32]:
def restoreSeq(seq_ordered, reverse_index):
    # shape = list(seq_ordered.shape)
    data_type = seq_ordered.type()
    shape = list(seq_ordered.shape)
    shape[0] = len(reverse_index) - shape[0]
    t = torch.cat([seq_ordered, torch.zeros(shape).type(data_type)])
    seq_restored = t[reverse_index]
    return seq_restored

In [33]:
info_new = restoreSeq(ord_info_output, r_idx)
print(info_new.shape)

torch.Size([72, 4, 512])


In [34]:
output_size = dim
info_output = info_new.view(*list(leng_mask.shape) + [output_size])
print(info_output.shape)

torch.Size([3, 6, 4, 4, 512])


In [35]:
info_output[:,:,:,:,0]

tensor([[[[ 0.8383, -0.9131,  0.5006,  0.0000],
          [ 0.5006, -0.2046, -0.2352,  1.3841],
          [ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000]],

         [[-0.1936, -0.0567,  0.5737, -0.4665],
          [-0.7809, -0.4120,  0.0000,  0.0000],
          [-0.2352,  1.1215,  0.0000,  0.0000],
          [-1.1881,  0.0000,  0.0000,  0.0000]],

         [[-0.5259,  0.0000,  0.0000,  0.0000],
          [ 0.2825, -1.1227, -2.8148,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000]],

         [[-1.1142, -3.0648,  0.1113, -0.6002],
          [-0.0567,  0.8383,  0.4693,  0.0000],
          [-0.4740, -0.7515,  1.0453,  0.8504],
          [ 0.0000,  0.0000,  0.0000,  0.0000]],

         [[-0.2369, -0.7212,  0.0000,  0.0000],
          [-0.9093,  0.0000,  0.0000,  0.0000],
          [-0.8121, -1.8616,  0.0000,  0.0000],
          [-0.1936,  0.2140, -0.0567, -0.1639]],

         [[-0.4665,  1.3841, -

In [36]:
leng_mask

tensor([[[[False, False, False,  True],
          [False, False, False, False],
          [ True,  True,  True,  True],
          [ True,  True,  True,  True]],

         [[False, False, False, False],
          [False, False,  True,  True],
          [False, False,  True,  True],
          [False,  True,  True,  True]],

         [[False,  True,  True,  True],
          [False, False, False,  True],
          [ True,  True,  True,  True],
          [ True,  True,  True,  True]],

         [[False, False, False, False],
          [False, False, False,  True],
          [False, False, False, False],
          [ True,  True,  True,  True]],

         [[False, False,  True,  True],
          [False,  True,  True,  True],
          [False, False,  True,  True],
          [False, False, False, False]],

         [[False, False, False, False],
          [False, False,  True,  True],
          [ True,  True,  True,  True],
          [ True,  True,  True,  True]]],


        [[[False, False, F