# Simulate Data

In [1]:
import torch
import numpy as np
from fieldnn.utils.layerfn import traverse
from fieldnn.utils.simulate import get_next_info, get_simulated_tensor_from_fldname

B_lenP = 3
B2P_lnEC = [6, 5, 2] # 
prefix_layers_num = 2
vocab_size = 100 
Ignore_PSN_Layers = ['B', 'St']

###############
FLD_LIST = [
'B-St-Tk:SlfGrn',
'B-St-Tk:POSGrn',
'B-St-Tk:AnnoGrn',
'B-St-Tk:SubWord-CharGrn',
'B-St-Tk:SubWord-SyllableGrn',
'B-St-Tk:SubWord-PhonemeGrn',
]


### TODO:
FLD_END = 'B-St'
FLD_END = 'B-St-Tk'

# FLD_LIST = [
# 'B-P-EC:Diag-DiagRec:DiagV-DiagVdftGrn',
# 'B-P-EC:Diag-DiagRec:DiagDT-DiagDTdftGrn',
# 'B-P-EC:Med-MedRec:MedV-MedVdftGrn',
# 'B-P-EC:Med-MedRec:MedDT-MedDTdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CV-A1CVdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CDT-A1CDTdftGrn',
# 'B-P-EC:PN-PNRec:SctName-SNdftGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SelfGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:POSGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SubWord-CharGrn',
# ]

# FLD_END = 'B-P'
# FLD_END = 'B-P-EC'


###############
NAME_2_FULLNAME = {i.split('-')[-1]:i for i in FLD_LIST}

###############
FLD_2_VOCABSIZE = {k: np.random.randint(5000) for k in FLD_LIST}

#####################
FLD_2_DATA = {}

for fullname in FLD_LIST:
    vocab_size = FLD_2_VOCABSIZE[fullname]
    info_idx = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
    # print(info_idx.shape)
    holder = torch.LongTensor(info_idx)
    # info_idx = torch.LongTensor(info_idx)
    FLD_2_DATA[fullname] = {'holder': holder, 'info': 'Empty'}

1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 3)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 5)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 5)


In [2]:
##################
for fullname, data in FLD_2_DATA.items():
    print(f'\n{fullname}')
    holder = data['holder']
    print(holder.shape)


B-St-Tk:SlfGrn
torch.Size([3, 6])

B-St-Tk:POSGrn
torch.Size([3, 6])

B-St-Tk:AnnoGrn
torch.Size([3, 6])

B-St-Tk:SubWord-CharGrn
torch.Size([3, 6, 3])

B-St-Tk:SubWord-SyllableGrn
torch.Size([3, 6, 5])

B-St-Tk:SubWord-PhonemeGrn
torch.Size([3, 6, 5])


In [3]:
# st: sentence
# Tk:SlfGrn: Token's as itself. Token. 
data = FLD_2_DATA['B-St-Tk:SlfGrn']
data['holder']

tensor([[2186, 2999, 1115, 3456,  739,  323],
        [1796, 2415, 2022, 1411, 2961,    0],
        [1649,  597,    0,    0,    0,    0]])

In [4]:
data['holder'].shape

torch.Size([3, 6])

# Data Flow Structure

In [5]:
import pandas as pd

def get_dataflow_info(fld_list):
    df = pd.DataFrame([i.split('-') for i in fld_list])
    L = []
    for idx, row in df.iterrows():
        for layer_idx in range(1, len(row) - 1):

            # f'{layer_idx}-{(layer_idx + 1)}' + ':' +
            a, b = layer_idx, layer_idx + 1
            if row[b] == None: continue
            if row[a] == None: continue
            layer_nn_name =  f'{a}-{b}@{row[a]}-{row[b]}'
            if layer_nn_name not in L:
                L.append(layer_nn_name)
    # print(L)
    layers = [i.split('@')[0] for i in L]
    L = [i.split('@')[-1] for i in L]
    # print(layers)
    info = pd.DataFrame({'layers': layers, 'nn': L}).sort_values('nn').reset_index(drop = True)
    return info


def get_merge_layernn(x):
    D = {}
    layer_name = x[0].split(':')[0]
    for i in x:
        i = i.split(':')[-1]
        a, b = i.split('-')
        if a not in D:
            D[a] = [b]
        else:
            D[a].append(b)
    
    L = []
    for parent, childrens in D.items():
        if len(childrens) >= 2:
            L.append(layer_name + ':' + parent + '-' + '&'.join(childrens))
    return L


def get_single_layernn(x):
    D = {}
    layer_name = x[0].split(':')[0]
    for i in x:
        i = i.split(':')[-1]
        a, b = i.split('-')
        if a not in D:
            D[a] = [b]
        else:
            D[a].append(b)
    
    L = []
    for parent, childrens in D.items():
        if len(childrens) == 1:
            L.append(layer_name + ':' + parent + '-' + '&'.join(childrens))
    return L
    
    
def generate_df_struct(fld_list, mergefirst_fld_list):
    info = get_dataflow_info(fld_list)
    df_struct = info.groupby('layers').apply(lambda x: x['nn'].to_list()).reset_index()
    df_struct.columns = ['layers', 'nn']
    # df_struct['grn'] = df_struct['nn'].apply(lambda x: [i for i in x if 'Grn' in i])
    # df_struct['fld'] = df_struct['nn'].apply(lambda x: [i for i in x if 'Grn' not in i])
    # df_struct

    df_struct['single'] = df_struct['nn'].apply(lambda x: get_single_layernn(x))
    df_struct['merge'] = df_struct['nn'].apply(lambda x: get_merge_layernn(x))

    df_struct['mergefirst'] = df_struct['merge'].apply(lambda x: [i for i in x if i.split('-')[-1] in mergefirst_fld_list])
    df_struct['mergelast'] = df_struct['merge'].apply(lambda x: [i for i in x if i.split('-')[-1] not in mergefirst_fld_list])
    # df_struct = df_struct.drop(columns = ['merge'])
    return df_struct

In [6]:
def get_merge_layernn(x):
    D = {}
    for i in x:
        # i = i.split(':')[-1]
        a, b = i.split('-')
        if a not in D:
            D[a] = [b]
        else:
            D[a].append(b)
    
    L = []
    for parent, childrens in D.items():
        L.append(f'{"^".join(childrens)}==>{parent}')

    return L

In [7]:

def get_structures_from_fldlist(fld_list):
    info = get_dataflow_info(fld_list)# .groupby('layers')
    df_struct = info.groupby('layers').apply(lambda x: x['nn'].to_list()).reset_index()
    df_struct.columns = ['layers', 'nn']
    # # df_struct
    # info
    df_struct['struct_name'] = df_struct['nn'].apply(lambda x: get_merge_layernn(x))
    # for nn_list in df_struct['nn'].values:
    #     # print(nn_list)
    #     for nn_name in nn_list:
    #         print(nn_name)
    #         # print(nn_name.split('-'))
    #     # print()
    #     x = nn_list
    #     L = get_merge_layernn(x)
    #     print(L)
    return df_struct

In [8]:
info = get_dataflow_info(FLD_LIST)
info

Unnamed: 0,layers,nn
0,1-2,St-Tk:AnnoGrn
1,1-2,St-Tk:POSGrn
2,1-2,St-Tk:SlfGrn
3,1-2,St-Tk:SubWord
4,2-3,Tk:SubWord-CharGrn
5,2-3,Tk:SubWord-PhonemeGrn
6,2-3,Tk:SubWord-SyllableGrn


In [9]:
df_struct = get_structures_from_fldlist(FLD_LIST)
df_struct

Unnamed: 0,layers,nn,struct_name
0,1-2,"[St-Tk:AnnoGrn, St-Tk:POSGrn, St-Tk:SlfGrn, St...",[Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St]
1,2-3,"[Tk:SubWord-CharGrn, Tk:SubWord-PhonemeGrn, Tk...",[CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord]


# Struct Name

In [10]:
def get_struct_info(struct_name, NAME_2_FULLNAME):
    inputs = struct_name.split('==>')[0].split('^')
    output = struct_name.split('==>')[1]
    fullname_inputs = [NAME_2_FULLNAME[i] for i in inputs]
    fullname_output = '-'.join(fullname_inputs[0].split('-')[:-2]) + '-' + output
    
    NAME_2_FULLNAME[output] = fullname_output

    if len(inputs) == 1:
        struct_model = 'RL'
    elif len(inputs) > 1:
        tmp = list(set([len(i.split(':')) for i in inputs]))
        assert len(tmp) == 1
        struct_model = 'MLRLRL' if tmp[0] == 2 else 'RLMLRL'
        
        
        # TODO HERE
        # 'MLRL'
    return fullname_inputs, fullname_output, struct_model, NAME_2_FULLNAME


In [11]:
def generate_structure(fullname_inputs, struct_model):
    # each input
    D_model = {}
    D_data = {}
    
    # things before M
    stages = struct_model.split('M')
    preM = stages[0]
    for fullname in fullname_inputs:
        input_field = fullname
        L = []
        # print('\n=== EL ===')
        if 'Grn' in fullname:
            inp = fullname
            out = fullname.replace('Grn', '')
            name = f'EL**{inp}=>{out}'
            L.append(name)
            fullname = out
        
        if 'R' in preM:
            # print('\n=== RL ===')
            inp = fullname
            out = '-'.join(fullname.split('-')[:-1]) + ':' + fullname.split('-')[-1]
            fullname = out
            name = f'RL**{inp}=>{out}'
            L.append(name)
        
        D_model[input_field] = L
        D_data[input_field] = fullname

    if 'M' not in struct_model:
        return D_model, D_data
    
    else:
        # print('\n=== ML ===')
        L = [v for k, v in D_data.items()]
        parents = ['-'.join(i.split('-')[:-1]) for i in L]
        # print(parents)
        assert len(set(parents)) == 1
        parent = parents[0]

        name_new_list = [i.split('-')[-1] for i in L]
        prefixes = [':'.join(i.split(':')[:-1]) for i in name_new_list]
        assert len(set(prefixes)) == 1
        prefix = prefixes[0]

        fields = [i.split(':')[-1] for i in name_new_list]
        new_layer = '&'.join(fields)
        new_name = '-'.join([prefix, new_layer])
        fullname = parent + '-' + new_name

        Model_list = []
        inp = '^'.join(L)
        out = fullname
        name = f'ML**{inp}=>{out}'
        Model_list.append(name)
    
    # post M
    if len(stages) == 1:
        # no post M
        D_model['^'.join(L)] = Model_list
        D_data['^'.join(L)] = fullname
        return D_model, D_data
    else:
        postM = stages[-1]
        for i in range(postM.count('R')):
            # print('\n=== RL ===')
            inp = fullname
            out = '-'.join(inp.split('-')[:-1]) + ':' + inp.split('-')[-1]
            # print(out)
            name = f'RL**{inp}=>{out}'
            Model_list.append(name)
            fullname = out

        D_model['^'.join(L)] = Model_list
        D_data['^'.join(L)] = fullname
        # final_fullname = ':'.join(fullname.split(':')[:-1])
        return D_model, D_data

In [12]:

df_struct = get_structures_from_fldlist(FLD_LIST)

tmp = df_struct.sort_values('layers', ascending = False)
layer2modulelist = dict(zip(tmp['layers'].to_list(), tmp['struct_name'].to_list()))
layer2modulelist

{'2-3': ['CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'],
 '1-2': ['Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St']}

In [13]:


# name2fullname = {i.split('-')[-1]:i for i in fld_list}
df_struct = get_structures_from_fldlist(FLD_LIST)
# df_struct# .sort_values('layers', ascending = False)['struct_name'].to_list()

tmp = df_struct.sort_values('layers', ascending = False)
layer2modulelist = dict(zip(tmp['layers'].to_list(), tmp['struct_name'].to_list()))
layer2modulelist

{'2-3': ['CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'],
 '1-2': ['Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St']}

In [14]:
from pprint import pprint
from fieldnn.utils.parafn import get_expander_para

fullname = FLD_LIST[0]
print(fullname)

embed_size = 512

expander_process = {# 'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}


process = {'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

nn_name = 'Embedding'
vocab_size = FLD_2_VOCABSIZE[fullname]
nn_para = {'input_size': vocab_size}
postprocess = process
Ignore_PSN_Layers = fullname.split('-')[:2]
expander_layer_para = get_expander_para(fullname, nn_name, nn_para, embed_size, 
                                        Ignore_PSN_Layers, 
                                        postprocess
                                       )
pprint(expander_layer_para)

B-St-Tk:SlfGrn
{'B-St-Tk:SlfGrn': ('Embedding',
                    {'embedding_size': 512,
                     'init': 'random',
                     'input_size': 3478}),
 'Ignore_PSN_Layers': ['B', 'St'],
 'input_size': None,
 'output_size': 512,
 'postprocess': {'activator': 'gelu',
                 'dropout': {'inplace': False, 'p': 0.5},
                 'layernorm': {'elementwise_affine': True, 'eps': 1e-05}}}


In [15]:
StructName2Settings = {}
for layername, struct_list in layer2modulelist.items():
    # print(layername, struct_list)
    for struct_name in struct_list:
        print(layername, struct_name)
        fullname_inputs, fullname_output, struct_model, NAME_2_FULLNAME = get_struct_info(struct_name, NAME_2_FULLNAME)
        D_model, D_data = generate_structure(fullname_inputs, struct_model)
        
        d = {}
        d['fullname_inputs'] = fullname_inputs
        d['fullname_output'] = fullname_output
        d['struct_model'] = struct_model
        d['D_model'] = D_model
        d['D_data'] = D_data
        StructName2Settings[struct_name] = d
        
        for input_data, pipeline_list in StructName2Settings[struct_name]['D_model'].items():
            print('----> (input)', input_data)
            for pipeline_name in pipeline_list:
                print('   ----> (pipeline name)', pipeline_name)
                
                sublayer_type, input_output = pipeline_name.split('**')
                input_fullnames, output_fullname = input_output.split('=>')
                input_fullname_list = input_fullnames.split('^')
                
                # print('       ----> (+)', sublayer_type)
                # print('       ----> (+)', input_fullname_list)
                # print('       ----> (+)', output_fullname)

2-3 CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord
----> (input) B-St-Tk:SubWord-CharGrn
   ----> (pipeline name) EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char
   ----> (pipeline name) RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char
----> (input) B-St-Tk:SubWord-PhonemeGrn
   ----> (pipeline name) EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme
   ----> (pipeline name) RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme
----> (input) B-St-Tk:SubWord-SyllableGrn
   ----> (pipeline name) EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable
   ----> (pipeline name) RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable
----> (input) B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable
   ----> (pipeline name) ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable
   ----> (pipeline name) RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable
1-2 Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn

In [16]:
# pprint(StructName2Settings)

# Pipeline Para

## Config

In [17]:
def get_fullname_from_inputs(fullname_list):
    names = [i.split('-')[-1] for i in fullname_list]
    
    prefix = ['-'.join(i.split('-')[:-1]) for i in fullname_list][0]
    
    table_row_indicator = list(set([i.count(':') for i in names]))[0]
    
    if table_row_indicator == 0:
        fullname = f'{prefix}-{"&".join(names)}'
    elif table_row_indicator >= 1:
        table_name = [':'.join(i.split(':')[:-1]) for i in names][0]
        columns =  [i.split(':')[-1] for i in names]
        fullname = f'{prefix}-{table_name}-{"&".join(columns)}'
    else:
        raise ValueError(f'"table_row_indicator" {table_row_indicator} if not correct')
    return fullname

In [18]:
from fieldnn.utils.parafn import get_expander_para, get_learner_para, get_reducer_para, get_merger_para
# from fieldnn.utils.parafn import get_fullname_from_inputs
# from .parafn import get_expander_para, get_learner_para, get_reducer_para, get_merger_para
# from .parafn import get_fullname_from_inputs

default_learner_para  = {
    'nn_name': 'TFM',
    'nn_para': {'num_encoder_layers': 6}
}

default_reducer_para  = {
    'nn_name': 'Max',
}


def get_EL_sublayer_para_list(input_fullname_list, 
                              FLD_2_VOCABSIZE, 
                              embed_size,
                              default_learner_para, 
                              expander_process, 
                              default_process, 
                              Ignore_PSN_Layers):
    # print(input_fullname_list)
    assert len(input_fullname_list) == 1
    fullname = input_fullname_list[0]
    output_fullname = fullname.replace('Grn', '')
    ###########
    nn_name = 'Embedding'
    vocab_size = FLD_2_VOCABSIZE[fullname]
    nn_para = {'input_size': vocab_size}
    postprocess = expander_process
    ###########
    expander_layer_para = get_expander_para(fullname, nn_name, nn_para, embed_size, 
                                            Ignore_PSN_Layers, 
                                            postprocess
                                           )
    # print(expander_layer_para)
    ###########
    
    nn_name = default_learner_para['nn_name']
    nn_para = default_learner_para['nn_para']
    input_size = embed_size
    output_size = embed_size
    embedprocess = default_process
    postprocess = default_process
    ###########
    learner_layer_para  = get_learner_para(output_fullname, 
                                           nn_name, nn_para, 
                                           input_size, output_size, 
                                           Ignore_PSN_Layers, 
                                           embedprocess, postprocess
                                          )
    # print(learner_layer_para)
    para_dict = {'Expander': expander_layer_para, 'Learner': learner_layer_para}
    return para_dict



def get_RL_sublayer_para_list(input_fullname_list, 
                              embed_size, 
                              default_learner_para,
                              default_reducer_para,
                              default_process,
                              Ignore_PSN_Layers):
    assert len(input_fullname_list) == 1
    fullname = input_fullname_list[0]

    #########
    nn_name = default_reducer_para['nn_name'] # 'Max'
    nn_para = {}
    input_size = embed_size
    output_size = embed_size if nn_name != 'concat' else embed_size * 3
    postprocess = default_process
    #########

    reducer_layer_para = get_reducer_para(fullname, nn_name, nn_para, input_size, output_size, postprocess)
    # print(reducer_layer_para)

    ###########
    output_fullname = '-'.join(fullname.split('-')[:-1]) + ':' + fullname.split('-')[-1]
    
    if len(output_fullname.split('-')) == 2:
        # B and Obs: from tfm to linear
        nn_name = 'linear'
        nn_para = {}
        embedprocess = {}
        postprocess = default_process
    
    else:
        nn_name = default_learner_para['nn_name']
        nn_para = default_learner_para['nn_para']
        embedprocess = default_process
        postprocess = default_process
    input_size = embed_size
    output_size = embed_size
    
    ###########
    learner_layer_para  = get_learner_para(output_fullname, 
                                           nn_name, nn_para, 
                                           input_size, output_size, 
                                           Ignore_PSN_Layers, 
                                           embedprocess, postprocess
                                          )
    # print(learner_layer_para)
    para_dict = {'Reducer': reducer_layer_para, 'Learner': learner_layer_para}
    return para_dict
    

def get_ML_sublayer_para_list(input_fullname_list,
                              embed_size, 
                              default_learner_para,
                              default_process, 
                              Ignore_PSN_Layers):
    assert len(input_fullname_list) > 1
    fullname = '^'.join(input_fullname_list) # input of M
    # fullname = get_fullname_from_inputs(input_fullname_list)

    #########
    nn_name = 'Merger'
    nn_para = {}
    input_size = embed_size
    output_size = embed_size
    postprocess = default_process
    #########

    merger_layer_para = get_merger_para(fullname, nn_name, nn_para, input_size, output_size, postprocess)
    # print(merger_layer_para)

    ###########
    # print(input_fullname_list, '<----get_ML_sublayer_para_list') 
    fullname = get_fullname_from_inputs(input_fullname_list) # input of L
    # print(fullname, '<----get_ML_sublayer_para_list') 
    nn_name = default_learner_para['nn_name']
    nn_para = default_learner_para['nn_para']
    input_size = embed_size
    output_size = embed_size
    embedprocess = default_process
    postprocess = default_process
    ###########
    learner_layer_para  = get_learner_para(fullname, 
                                           nn_name, nn_para, 
                                           input_size, output_size, 
                                           Ignore_PSN_Layers, 
                                           embedprocess, postprocess
                                          )
    # print(merger_layer_para)
    para_dict = {'Merger': merger_layer_para, 'Learner': learner_layer_para}
    return para_dict

    

def process_pipeline_name(sublayer_name, FLD_2_VOCABSIZE, embed_size, 
                          default_learner_para,  default_reducer_para,
                          expander_process, default_process, Ignore_PSN_Layers):
    # print('   ----> (sublayer name)', sublayer_name)   
    sublayer_type, input_output = sublayer_name.split('**')
    input_fullnames, output_fullname = input_output.split('=>')
    input_fullname_list = input_fullnames.split('^')
    # print('       ----> (+)', sublayer_type)
    # print('       ----> (+)', input_fullname_list)
    # print('       ----> (+)', output_fullname)

    if 'EL' == sublayer_type:
        para_dict = get_EL_sublayer_para_list(input_fullname_list, 
                                              FLD_2_VOCABSIZE, 
                                              embed_size,
                                              default_learner_para, 
                                              expander_process, 
                                              default_process, 
                                              Ignore_PSN_Layers)
    elif 'RL' == sublayer_type:
        para_dict = get_RL_sublayer_para_list(input_fullname_list, 
                                              embed_size, 
                                              default_learner_para,
                                              default_reducer_para,
                                              default_process,
                                              Ignore_PSN_Layers)
    elif 'ML' == sublayer_type:
        para_dict = get_ML_sublayer_para_list(input_fullname_list,
                                              embed_size, 
                                              default_learner_para,
                                              default_process, 
                                              Ignore_PSN_Layers)
    else:
        raise ValueError(f'The sublayer type {sublayer_type} is not available')
        
        
    return input_fullname_list, output_fullname, para_dict

## Module

In [19]:
import os
import torch
import numpy as np

# from .expander import Expander_Layer
# from .learner import Learner_Layer
# from .merger import Merger_Layer

from fieldnn.sublayer.expander import Expander_Layer
from fieldnn.sublayer.learner import Learner_Layer
from fieldnn.sublayer.merger import Merger_Layer
from fieldnn.sublayer.reducer import Reducer_Layer


class Pipeline_Layer(torch.nn.Module):
    def __init__(self, pipeline_name, input_fullname, output_fullname, para_dict):
        super(Pipeline_Layer, self).__init__()
        
        self.pipeline_name = pipeline_name
        self.input_fullname = input_fullname
        # self.input_fullname_list = input_fullname.split('^')
        self.output_fullname = output_fullname
        self.para_dict = para_dict
        
        self.Layers = torch.nn.ModuleDict()
        for name, para in para_dict.items():
            if name == 'Expander':
                assert 'Grn' == input_fullname[-3:]
                assert input_fullname.replace('Grn', '') == output_fullname
                self.Layers[input_fullname] = Expander_Layer(input_fullname, output_fullname, para)
        
            elif name == 'Merger':
                assert len(input_fullname.split('^')) > 1
                self.Layers[input_fullname] = Merger_Layer(input_fullname, output_fullname, para)
        
            elif name == 'Reducer':
                self.Layers[input_fullname] = Reducer_Layer(input_fullname, output_fullname, para)
                
            elif name == 'Learner':
                assert output_fullname in para
                self.Layers[output_fullname] = Learner_Layer(output_fullname, output_fullname, para)
            else:
                raise ValueError(f'The sublayer name "{name}" is not available')
                
    def forward(self, fullname2data):
        for input_fullname, Layer in self.Layers.items():
            
            if '^' not in input_fullname:
                # holder, info = fullname2data.pop(input_fullname)
                # print(input_fullname, '<---input_fullname')
                # print(type(Layer), '<---Layer type')
                # print(Layer.input_fullname, '<---Layer type')
                # print(Layer.output_fullname, '<---Layer type')
                data = fullname2data.get(input_fullname)
                holder, info = data['holder'], data['info']
                # print(f'input_fullname: {input_fullname}, Layer Type {type(Layer)}')
                # print(holder.max())
                fullname, holder, info = Layer(input_fullname, holder, info)
                # print(fullname, '<--- output fullname')
                fullname2data[fullname] = {'holder': holder, 'info': info}
            else:
                input_fullname_list = input_fullname.split('^')
                # print(input_fullname)
                # fullname2data_copy = {k: fullname2data.pop(k) for k in input_fullname_list}
                fullname2data_copy = {k: fullname2data.get(k) for k in input_fullname_list}
                fullname, holder, info = Layer(input_fullname, fullname2data_copy)
                fullname2data[fullname] = {'holder': holder, 'info': info}
                
        return fullname2data
    

## Usage

### Simulate Data

In [20]:
import torch
from pprint import pprint
import numpy as np
from fieldnn.utils.layerfn import traverse
from fieldnn.utils.simulate import get_next_info, get_simulated_tensor_from_fldname

B_lenP = 3
B2P_lnEC = [6, 5, 2] # 
prefix_layers_num = 2
vocab_size = 100
Ignore_PSN_Layers = ['B', 'St']

###############
FLD_LIST = [
'B-St-Tk:SlfGrn',
'B-St-Tk:POSGrn',
'B-St-Tk:AnnoGrn',
'B-St-Tk:SubWord-CharGrn',
'B-St-Tk:SubWord-SyllableGrn',
'B-St-Tk:SubWord-PhonemeGrn',
]

# FLD_LIST = [
# 'B-P-EC:Diag-DiagRec:DiagV-DiagVdftGrn',
# 'B-P-EC:Diag-DiagRec:DiagDT-DiagDTdftGrn',
# 'B-P-EC:Med-MedRec:MedV-MedVdftGrn',
# 'B-P-EC:Med-MedRec:MedDT-MedDTdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CV-A1CVdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CDT-A1CDTdftGrn',
# 'B-P-EC:PN-PNRec:SctName-SNdftGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SelfGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:POSGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SubWord-CharGrn',
# ]

###############
NAME_2_FULLNAME = {i.split('-')[-1]:i for i in FLD_LIST}

###############
FLD_2_VOCABSIZE = {k: np.random.randint(5000) for k in FLD_LIST}

#####################
FLD_2_DATA = {}

for fullname in FLD_LIST:
    vocab_size = FLD_2_VOCABSIZE[fullname]
    info_idx = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
    # print(info_idx.shape)
    holder = torch.LongTensor(info_idx)
    # info_idx = torch.LongTensor(info_idx)
    FLD_2_DATA[fullname] = {'holder': holder, 'info': 'Empty'}
    
######################
embed_size = 512
expander_process = {# 'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_process = {'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_learner_para  = {
    'nn_name': 'TFM',
    'nn_para': {'num_encoder_layers': 6}
}
default_reducer_para  = {
    'nn_name': 'Max',
}
##################################

1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 9)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 4)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 7)


In [21]:
[i for i in FLD_2_DATA]

['B-St-Tk:SlfGrn',
 'B-St-Tk:POSGrn',
 'B-St-Tk:AnnoGrn',
 'B-St-Tk:SubWord-CharGrn',
 'B-St-Tk:SubWord-SyllableGrn',
 'B-St-Tk:SubWord-PhonemeGrn']

### Pipeline 1.1

In [22]:
######################################################
pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)

# print(PipeLine)

B-St-Tk:SubWord-CharGrn
B-St-Tk:SubWord-Char


In [23]:
# PipeLine.Layers

In [24]:
print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char']


In [25]:
print(input_fullname)
data = FLD_2_DATA[input_fullname]
holder = data['holder']
holder

B-St-Tk:SubWord-CharGrn


tensor([[[ 45, 104,  85,   0,   0,   0,   0,   0,   0],
         [307, 431, 214,   0,   0,   0,   0,   0,   0],
         [134, 337, 363, 391, 344,  94, 484,   0,   0],
         [429,  54, 272, 416,   0,   0,   0,   0,   0],
         [ 52, 339, 190, 181,   0,   0,   0,   0,   0],
         [311, 403,   0,   0,   0,   0,   0,   0,   0]],

        [[ 81, 369, 430,   0,   0,   0,   0,   0,   0],
         [ 49, 346, 269,  54,   0,   0,   0,   0,   0],
         [  4,  40, 405, 248,  82, 310, 243, 397,   0],
         [400,   2, 326, 428,   0,   0,   0,   0,   0],
         [315, 168, 132, 141,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0]],

        [[481, 213, 227, 356,   0,   0,   0,   0,   0],
         [475, 308, 411, 252,   9, 159,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,

In [26]:
print(output_fullname)
data = FLD_2_DATA[output_fullname]
holder = data['holder']
holder

B-St-Tk:SubWord-Char


tensor([[[ 45, 104,  85,   0,   0,   0,   0,   0,   0],
         [307, 431, 214,   0,   0,   0,   0,   0,   0],
         [134, 337, 363, 391, 344,  94, 484,   0,   0],
         [429,  54, 272, 416,   0,   0,   0,   0,   0],
         [ 52, 339, 190, 181,   0,   0,   0,   0,   0],
         [311, 403,   0,   0,   0,   0,   0,   0,   0]],

        [[ 81, 369, 430,   0,   0,   0,   0,   0,   0],
         [ 49, 346, 269,  54,   0,   0,   0,   0,   0],
         [  4,  40, 405, 248,  82, 310, 243, 397,   0],
         [400,   2, 326, 428,   0,   0,   0,   0,   0],
         [315, 168, 132, 141,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0]],

        [[481, 213, 227, 356,   0,   0,   0,   0,   0],
         [475, 308, 411, 252,   9, 159,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,

In [27]:
info = data['info']
info[:,:,:,-1]

tensor([[[-2.2399e+00, -2.4432e-01, -1.9674e-02,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [-1.2034e-03, -1.1415e-02,  3.3665e-02,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [-1.5489e+00,  2.1165e-02,  9.7997e-01,  8.9551e-01,  5.5132e-03,
          -9.9906e-01, -6.4794e-03,  0.0000e+00,  0.0000e+00],
         [-7.7219e-01, -5.6175e-02, -5.1582e-01,  2.5596e-02,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 3.2194e-02, -7.8172e-02,  2.6554e+00, -5.1002e-01,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 2.8408e-02, -8.8949e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[-1.5224e-02,  1.5349e-02, -4.9126e-02,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [-2.3654e+00, -1.6772e+

### Pipeline 1.2

In [28]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)

# print(PipeLine)

B-St-Tk:SubWord-Char
B-St-Tk:SubWord:Char


In [29]:
print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char']


In [30]:
print(input_fullname)
data = FLD_2_DATA[input_fullname]
holder = data['holder']
holder

B-St-Tk:SubWord-Char


tensor([[[ 45, 104,  85,   0,   0,   0,   0,   0,   0],
         [307, 431, 214,   0,   0,   0,   0,   0,   0],
         [134, 337, 363, 391, 344,  94, 484,   0,   0],
         [429,  54, 272, 416,   0,   0,   0,   0,   0],
         [ 52, 339, 190, 181,   0,   0,   0,   0,   0],
         [311, 403,   0,   0,   0,   0,   0,   0,   0]],

        [[ 81, 369, 430,   0,   0,   0,   0,   0,   0],
         [ 49, 346, 269,  54,   0,   0,   0,   0,   0],
         [  4,  40, 405, 248,  82, 310, 243, 397,   0],
         [400,   2, 326, 428,   0,   0,   0,   0,   0],
         [315, 168, 132, 141,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0]],

        [[481, 213, 227, 356,   0,   0,   0,   0,   0],
         [475, 308, 411, 252,   9, 159,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,   0,   0],
         [  0,   0,   0,   0,   0,   0,   0,

In [31]:
print(output_fullname)
data = FLD_2_DATA[output_fullname]
holder = data['holder']
holder

B-St-Tk:SubWord:Char


tensor([[3, 3, 7, 4, 4, 2],
        [3, 4, 8, 4, 4, 0],
        [4, 6, 0, 0, 0, 0]])

### Pipeline 2.1

In [32]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)

print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

B-St-Tk:SubWord-PhonemeGrn
B-St-Tk:SubWord-Phoneme
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme']


### Pipeline 2.2

In [33]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)

print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

B-St-Tk:SubWord-Phoneme
B-St-Tk:SubWord:Phoneme
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme']


### Pipeline 3.1

In [34]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
pprint(para_dict)

B-St-Tk:SubWord-SyllableGrn
B-St-Tk:SubWord-Syllable
{'Expander': {'B-St-Tk:SubWord-SyllableGrn': ('Embedding',
                                              {'embedding_size': 512,
                                               'init': 'random',
                                               'input_size': 445}),
              'Ignore_PSN_Layers': ['B', 'St'],
              'input_size': None,
              'output_size': 512,
              'postprocess': {'dropout': {'inplace': False, 'p': 0.5},
                              'layernorm': {'elementwise_affine': True,
                                            'eps': 1e-05}}},
 'Learner': {'B-St-Tk:SubWord-Syllable': ('TFM',
                                          {'dim_feedforward': 2048,
                                           'input_size': 512,
                                           'nhead': 8,
                                           'num_decoder_layers': 0,
                                           'num_encoder_layers'

In [35]:

PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)


In [36]:
data = FLD_2_DATA[input_fullname]
data['holder'].max()

tensor(434)

In [37]:

print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable']


### Pipeline 3.2

In [38]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)

print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

B-St-Tk:SubWord-Syllable
B-St-Tk:SubWord:Syllable
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable']


### Pipeline 4.1

In [39]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)

B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable
B-St-Tk:SubWord-Char&Phoneme&Syllable


In [40]:
# PipeLine.Layers

In [41]:
print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable']


### Pipeline 4.2

In [42]:
######################################################
# pipeline_name = 'EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char'
# pipeline_name = 'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'

# pipeline_name = 'EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme'
# pipeline_name = 'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'

# pipeline_name = 'EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable'
# pipeline_name = 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'

# pipeline_name = 'ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable'
pipeline_name = 'RL**B-St-Tk:SubWord-Char&Phoneme&Syllable=>B-St-Tk:SubWord:Char&Phoneme&Syllable'
######################################################

input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, FLD_2_VOCABSIZE, embed_size, 
                                                                        default_learner_para,  default_reducer_para,
                                                                        expander_process, default_process, Ignore_PSN_Layers)

input_fullname = '^'.join(input_fullname_list)

print(input_fullname)
print(output_fullname)
# pprint(para_dict)
PipeLine = Pipeline_Layer(pipeline_name, input_fullname, output_fullname, para_dict)
# print(PipeLine)

print([i for i in FLD_2_DATA])
FLD_2_DATA = PipeLine(FLD_2_DATA)
print([i for i in FLD_2_DATA]) 

B-St-Tk:SubWord-Char&Phoneme&Syllable
B-St-Tk:SubWord:Char&Phoneme&Syllable
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable', 'B-St-Tk:SubWord:Char&Phoneme&Syllable']


In [43]:
print(output_fullname)
data = FLD_2_DATA[output_fullname]
holder = data['holder']
holder

B-St-Tk:SubWord:Char&Phoneme&Syllable


tensor([[3, 3, 3, 3, 3, 3],
        [3, 3, 3, 3, 3, 0],
        [3, 3, 0, 0, 0, 0]])

# Struct Module

## Module

In [44]:
import os
import torch
import numpy as np

# from .pipeline import Pipeline_Layer
# from ..utils.parafn import process_sublayer_name

class Struct_Layer(torch.nn.Module):
    def __init__(self, struct_name, struct_para, meta_para):
        super(Struct_Layer, self).__init__()
        self.struct_name = struct_name
        
        self.final_fullname_output = struct_para['final_fullname_output']
        self.D_model = struct_para['D_model'] 
        self.D_data = struct_para['D_data'] 
        
        
        self.FLD_2_VOCABSIZE = meta_para['FLD_2_VOCABSIZE']
        self.embed_size = meta_para['embed_size']
        self.default_learner_para = meta_para['default_learner_para']
        self.default_reducer_para = meta_para['default_reducer_para']
        self.expander_process = meta_para['expander_process']
        self.default_process = meta_para['default_process']
        self.Ignore_PSN_Layers = meta_para['Ignore_PSN_Layers']
        
        self.Layers = torch.nn.ModuleDict()
        
        for input_fullname, pipeline_list in self.D_model.items():
            self.Layers[input_fullname] = torch.nn.ModuleDict() 
            # print(input_fullname)
            for pipeline_name in pipeline_list:
                # print(pipeline_name, '<---- pipeline_name')
                input_fullname_list, output_fullname, para_dict = process_pipeline_name(pipeline_name, self.FLD_2_VOCABSIZE, self.embed_size, 
                                                                                        self.default_learner_para,  self.default_reducer_para,
                                                                                        self.expander_process, self.default_process, self.Ignore_PSN_Layers)
                pipe_input_fullname = '^'.join(input_fullname_list)
                PipeLine = Pipeline_Layer(pipeline_name, pipe_input_fullname, output_fullname, para_dict)
                self.Layers[input_fullname][pipeline_name] = PipeLine

    def forward(self, FLD_2_DATA):
        for input_fullname, output_full_name in self.D_data.items():
            for pipeline_name, Pipeline in self.Layers[input_fullname].items():
                print(f'\npipeline_name <---------- {pipeline_name} ')
                print([i for i in FLD_2_DATA])
                FLD_2_DATA = Pipeline(FLD_2_DATA)
                print([i for i in FLD_2_DATA])
            assert output_full_name in FLD_2_DATA
            
        # update the new output name to final_fullname_output
        assert self.final_fullname_output in output_full_name
        # fullname2data[self.final_fullname_output] = fullname2data.pop(output_full_name)
        FLD_2_DATA[self.final_fullname_output] = FLD_2_DATA.get(output_full_name)
        print(f'\n Final <---------- ')
        print([i for i in FLD_2_DATA])
        return FLD_2_DATA

## Usage

In [45]:
import torch
from pprint import pprint
import numpy as np
from fieldnn.utils.layerfn import traverse
from fieldnn.utils.simulate import get_next_info, get_simulated_tensor_from_fldname

B_lenP = 3
B2P_lnEC = [6, 5, 2] # 
prefix_layers_num = 2
vocab_size = 100

###############
FLD_LIST = [
'B-St-Tk:SlfGrn',
'B-St-Tk:POSGrn',
'B-St-Tk:AnnoGrn',
'B-St-Tk:SubWord-CharGrn',
'B-St-Tk:SubWord-SyllableGrn',
'B-St-Tk:SubWord-PhonemeGrn',
]

# FLD_LIST = [
# 'B-P-EC:Diag-DiagRec:DiagV-DiagVdftGrn',
# 'B-P-EC:Diag-DiagRec:DiagDT-DiagDTdftGrn',
# 'B-P-EC:Med-MedRec:MedV-MedVdftGrn',
# 'B-P-EC:Med-MedRec:MedDT-MedDTdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CV-A1CVdftGrn',
# 'B-P-EC:A1C-A1CRec:A1CDT-A1CDTdftGrn',
# 'B-P-EC:PN-PNRec:SctName-SNdftGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SelfGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:POSGrn',
# 'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SubWord-CharGrn',
# ]

###############

Ignore_PSN_Layers = FLD_LIST[0].split('-')[:2]



NAME_2_FULLNAME = {i.split('-')[-1]:i for i in FLD_LIST}

###############
FLD_2_VOCABSIZE = {k: np.random.randint(5000) for k in FLD_LIST}

#####################
FLD_2_DATA = {}

for fullname in FLD_LIST:
    vocab_size = FLD_2_VOCABSIZE[fullname]
    info_idx = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
    # print(info_idx.shape)
    holder = torch.LongTensor(info_idx)
    # info_idx = torch.LongTensor(info_idx)
    FLD_2_DATA[fullname] = {'holder': holder, 'info': 'Empty'}
    
######################
embed_size = 512
expander_process = {# 'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_process = {'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_learner_para  = {
    'nn_name': 'TFM',
    'nn_para': {'num_encoder_layers': 6}
}
default_reducer_para  = {
    'nn_name': 'Max',
}
##################################


meta_para = {}
meta_para['FLD_2_VOCABSIZE'] = FLD_2_VOCABSIZE
meta_para['embed_size'] = embed_size
meta_para['expander_process'] = expander_process
meta_para['default_process'] = default_process
meta_para['default_learner_para'] = default_learner_para
meta_para['default_reducer_para'] = default_reducer_para
meta_para['Ignore_PSN_Layers'] = Ignore_PSN_Layers

1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 8)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 9)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 9)


In [46]:
# name2fullname = {i.split('-')[-1]:i for i in fld_list}
df_struct = get_structures_from_fldlist(FLD_LIST)
# df_struct# .sort_values('layers', ascending = False)['struct_name'].to_list()

tmp = df_struct.sort_values('layers', ascending = False)
layer2modulelist = dict(zip(tmp['layers'].to_list(), tmp['struct_name'].to_list()))
layer2modulelist

{'2-3': ['CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'],
 '1-2': ['Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St']}

### Struct 1

In [47]:
####################################
struct_name = 'CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'
# struct_name = 'Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St'
####################################

fullname_input_list, final_fullname_output, struct_model, NAME_2_FULLNAME = get_struct_info(struct_name, NAME_2_FULLNAME)
fullname_input = '^'.join(fullname_input_list)
D_model, D_data = generate_structure(fullname_input_list, struct_model)

struct_para = {}
struct_para['fullname_input_list'] = fullname_input_list
struct_para['fullname_input'] = fullname_input
struct_para['final_fullname_output'] = final_fullname_output
struct_para['struct_model'] = struct_model
struct_para['D_model'] = D_model
struct_para['D_data'] = D_data


print(fullname_input_list)
print(fullname_input)
print(final_fullname_output)
print(struct_model)
# print(name2fullname)
pprint(D_model)
pprint(D_data)

['B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-SyllableGrn']
B-St-Tk:SubWord-CharGrn^B-St-Tk:SubWord-PhonemeGrn^B-St-Tk:SubWord-SyllableGrn
B-St-Tk:SubWord
RLMLRL
{'B-St-Tk:SubWord-CharGrn': ['EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char',
                             'RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char'],
 'B-St-Tk:SubWord-PhonemeGrn': ['EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme',
                                'RL**B-St-Tk:SubWord-Phoneme=>B-St-Tk:SubWord:Phoneme'],
 'B-St-Tk:SubWord-SyllableGrn': ['EL**B-St-Tk:SubWord-SyllableGrn=>B-St-Tk:SubWord-Syllable',
                                 'RL**B-St-Tk:SubWord-Syllable=>B-St-Tk:SubWord:Syllable'],
 'B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable': ['ML**B-St-Tk:SubWord:Char^B-St-Tk:SubWord:Phoneme^B-St-Tk:SubWord:Syllable=>B-St-Tk:SubWord-Char&Phoneme&Syllable',
                                                                           'RL**B-St-Tk:Su

In [48]:
Struct = Struct_Layer(struct_name, struct_para, meta_para)
# print(Struct)
FLD_2_DATA = Struct(FLD_2_DATA)


pipeline_name <---------- EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char']

pipeline_name <---------- RL**B-St-Tk:SubWord-Char=>B-St-Tk:SubWord:Char 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char']

pipeline_name <---------- EL**B-St-Tk:SubWord-PhonemeGrn=>B-St-Tk:SubWord-Phoneme 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk

In [49]:
data = FLD_2_DATA[Struct.final_fullname_output]
data['holder']

tensor([[3, 3, 3, 3, 3, 3],
        [3, 3, 3, 3, 3, 0],
        [3, 3, 0, 0, 0, 0]])

In [50]:
data['info'][:,:,-1]

tensor([[-0.2597, -0.2254,  1.9363,  0.8205, -0.0266, -0.0107],
        [-0.0383,  0.0350, -0.0070,  6.2519, -0.0233,  0.0000],
        [-0.8864,  0.5318,  0.0000,  0.0000,  0.0000,  0.0000]],
       grad_fn=<SelectBackward0>)

### Struct 2

In [51]:
####################################
# struct_name = 'CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'
struct_name = 'Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St'
####################################

fullname_input_list, final_fullname_output, struct_model, NAME_2_FULLNAME = get_struct_info(struct_name, NAME_2_FULLNAME)
fullname_input = '^'.join(fullname_input_list)
D_model, D_data = generate_structure(fullname_input_list, struct_model)

struct_para = {}
struct_para['fullname_input_list'] = fullname_input_list
struct_para['fullname_input'] = fullname_input
struct_para['final_fullname_output'] = final_fullname_output
struct_para['struct_model'] = struct_model
struct_para['D_model'] = D_model
struct_para['D_data'] = D_data


print(fullname_input_list)
print(fullname_input)
print(final_fullname_output)
print(struct_model)
# print(name2fullname)
pprint(D_model)
pprint(D_data)

['B-St-Tk:AnnoGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:SlfGrn', 'B-St-Tk:SubWord']
B-St-Tk:AnnoGrn^B-St-Tk:POSGrn^B-St-Tk:SlfGrn^B-St-Tk:SubWord
B-St
MLRLRL
{'B-St-Tk:AnnoGrn': ['EL**B-St-Tk:AnnoGrn=>B-St-Tk:Anno'],
 'B-St-Tk:Anno^B-St-Tk:POS^B-St-Tk:Slf^B-St-Tk:SubWord': ['ML**B-St-Tk:Anno^B-St-Tk:POS^B-St-Tk:Slf^B-St-Tk:SubWord=>B-St-Tk-Anno&POS&Slf&SubWord',
                                                          'RL**B-St-Tk-Anno&POS&Slf&SubWord=>B-St-Tk:Anno&POS&Slf&SubWord',
                                                          'RL**B-St-Tk:Anno&POS&Slf&SubWord=>B-St:Tk:Anno&POS&Slf&SubWord'],
 'B-St-Tk:POSGrn': ['EL**B-St-Tk:POSGrn=>B-St-Tk:POS'],
 'B-St-Tk:SlfGrn': ['EL**B-St-Tk:SlfGrn=>B-St-Tk:Slf'],
 'B-St-Tk:SubWord': []}
{'B-St-Tk:AnnoGrn': 'B-St-Tk:Anno',
 'B-St-Tk:Anno^B-St-Tk:POS^B-St-Tk:Slf^B-St-Tk:SubWord': 'B-St:Tk:Anno&POS&Slf&SubWord',
 'B-St-Tk:POSGrn': 'B-St-Tk:POS',
 'B-St-Tk:SlfGrn': 'B-St-Tk:Slf',
 'B-St-Tk:SubWord': 'B-St-Tk:SubWord'}


In [52]:
Struct = Struct_Layer(struct_name, struct_para, meta_para)
# print(Struct)
FLD_2_DATA = Struct(FLD_2_DATA)


pipeline_name <---------- EL**B-St-Tk:AnnoGrn=>B-St-Tk:Anno 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable', 'B-St-Tk:SubWord:Char&Phoneme&Syllable', 'B-St-Tk:SubWord']
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable', 'B-St-Tk:SubWord:Char&Phoneme&Syllable', 'B-St-Tk:SubWord', 'B-St-Tk:Anno']

pipeline_name <---------- EL**B-St-Tk:POSGrn=>B-St-Tk:POS 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 

In [53]:
# RL**B-St-Tk:Anno&POS&Slf&SubWord=>B-St:Tk:Anno&POS&Slf&SubWord

In [54]:
[i for i in Struct.Layers]

['B-St-Tk:AnnoGrn',
 'B-St-Tk:POSGrn',
 'B-St-Tk:SlfGrn',
 'B-St-Tk:SubWord',
 'B-St-Tk:Anno^B-St-Tk:POS^B-St-Tk:Slf^B-St-Tk:SubWord']

In [66]:
# input_fullname = 'B-St-Tk:Anno^B-St-Tk:POS^B-St-Tk:Slf^B-St-Tk:SubWord'
# pipeline_name = 'RL**B-St-Tk:Anno&POS&Slf&SubWord=>B-St:Tk:Anno&POS&Slf&SubWord'


# Pipeline = Struct.Layers[input_fullname][pipeline_name]
# print(Pipeline.input_fullname)
# print(Pipeline.output_fullname)

In [67]:
# data = FLD_2_DATA[Pipeline.input_fullname]
# data['holder']

In [68]:
# data['info']

In [69]:
# Pipeline

In [70]:
# FLD_2_DATA = Pipeline(FLD_2_DATA)

In [71]:
# [i for i in FLD_2_DATA]

In [72]:
# data = FLD_2_DATA['B-St:Tk:Anno&POS&Slf&SubWord']
# data['holder']

In [73]:
# data['info'].shape

# FieldRepr

## Module

In [113]:
import os
import torch
import numpy as np

# from .pipeline import Pipeline_Layer
# from ..utils.parafn import process_sublayer_name

class FieldRepr_Layer(torch.nn.Module):
    def __init__(self, FLD_LIST, FLD_END, meta_para):
        super(FieldRepr_Layer, self).__init__()
        
        df_struct = get_structures_from_fldlist(FLD_LIST)
        tmp = df_struct.sort_values('layers', ascending = False)
        layer2structlist = dict(zip(tmp['layers'].to_list(), tmp['struct_name'].to_list()))
        
        NAME_2_FULLNAME = {i.split('-')[-1]:i for i in FLD_LIST}

        self.FLD_LIST = FLD_LIST
        self.FLD_END = FLD_END
        self.NAME_2_FULLNAME = NAME_2_FULLNAME

        self.LAYERS = torch.nn.ModuleDict()
        for layer, structlist in layer2structlist.items():
            self.LAYERS[layer] = torch.nn.ModuleDict()
            for struct_name in structlist:
                
                # construct struct_para
                fullname_input_list, final_fullname_output, struct_model, NAME_2_FULLNAME = get_struct_info(struct_name, NAME_2_FULLNAME)
                fullname_input = '^'.join(fullname_input_list)
                D_model, D_data = generate_structure(fullname_input_list, struct_model)
                struct_para = {}
                struct_para['fullname_input_list'] = fullname_input_list
                struct_para['fullname_input'] = fullname_input
                struct_para['final_fullname_output'] = final_fullname_output
                struct_para['struct_model'] = struct_model
                struct_para['D_model'] = D_model
                struct_para['D_data'] = D_data
                self.LAYERS[layer][struct_name] = Struct_Layer(struct_name, struct_para, meta_para)

    def forward(self, FLD_2_DATA):
        for layer, LayerDict in self.LAYERS.items():
            for struct_name, StructLayer in LayerDict.items():
                print(f'\nstruct_name <---------- {struct_name} ')
                print([i for i in FLD_2_DATA])
                FLD_2_DATA = StructLayer(FLD_2_DATA)
                print([i for i in FLD_2_DATA])
                
        assert self.FLD_END in FLD_2_DATA
        return FLD_2_DATA[self.FLD_END]

## Usage

In [116]:
import torch
from pprint import pprint
import numpy as np
from fieldnn.utils.layerfn import traverse
from fieldnn.utils.simulate import get_next_info, get_simulated_tensor_from_fldname

B_lenP = 3
B2P_lnEC = [6, 5, 2] # 
prefix_layers_num = 2
vocab_size = 100

###############
# FLD_LIST = [
# 'B-St-Tk:SlfGrn',
# 'B-St-Tk:POSGrn',
# 'B-St-Tk:AnnoGrn',
# 'B-St-Tk:SubWord-CharGrn',
# 'B-St-Tk:SubWord-SyllableGrn',
# 'B-St-Tk:SubWord-PhonemeGrn',
# ]

# FLD_END = 'B-St'

FLD_LIST = [
'B-P-EC:Diag-DiagRec:DiagV-DiagVdftGrn',
'B-P-EC:Diag-DiagRec:DiagDT-DiagDTdftGrn',
    
'B-P-EC:Med-MedRec:MedV-MedVdftGrn',
'B-P-EC:Med-MedRec:MedDT-MedDTdftGrn',
    
'B-P-EC:A1C-A1CRec:A1CV-A1CVdftGrn',
'B-P-EC:A1C-A1CRec:A1CDT-A1CDTdftGrn',
    
'B-P-EC:PN-PNRec:SctName-SNdftGrn',
'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SelfGrn',
'B-P-EC:PN-PNRec:SctText-SctSent-Tk:POSGrn',
'B-P-EC:PN-PNRec:SctText-SctSent-Tk:SubWord-CharGrn',
]

FLD_END = 'B-P'

###############
Ignore_PSN_Layers = FLD_LIST[0].split('-')[:2]



NAME_2_FULLNAME = {i.split('-')[-1]:i for i in FLD_LIST}

###############
FLD_2_VOCABSIZE = {k: np.random.randint(5000) for k in FLD_LIST}

#####################
FLD_2_DATA = {}

for fullname in FLD_LIST:
    vocab_size = FLD_2_VOCABSIZE[fullname]
    info_idx = get_simulated_tensor_from_fldname(fullname, B_lenP, B2P_lnEC, prefix_layers_num, vocab_size)
    # print(info_idx.shape)
    holder = torch.LongTensor(info_idx)
    # info_idx = torch.LongTensor(info_idx)
    FLD_2_DATA[fullname] = {'holder': holder, 'info': 'Empty'}
    
######################
embed_size = 512
expander_process = {# 'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_process = {'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}

default_learner_para  = {
    'nn_name': 'TFM',
    'nn_para': {'num_encoder_layers': 6}
}
default_reducer_para  = {
    'nn_name': 'Max',
}
##################################


meta_para = {}
meta_para['FLD_2_VOCABSIZE'] = FLD_2_VOCABSIZE
meta_para['embed_size'] = embed_size
meta_para['expander_process'] = expander_process
meta_para['default_process'] = default_process
meta_para['default_learner_para'] = default_learner_para
meta_para['default_reducer_para'] = default_reducer_para
meta_para['Ignore_PSN_Layers'] = Ignore_PSN_Layers

1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 5)
3
3 --> (3, 6, 5)
4 --> (3, 6, 5, 5)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 9)
3
3 --> (3, 6, 9)
4 --> (3, 6, 9, 5)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 6)
3
3 --> (3, 6, 6)
4 --> (3, 6, 6, 2)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 3)
3
3 --> (3, 6, 3)
4 --> (3, 6, 3, 4)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 1)
3
3 --> (3, 6, 1)
4 --> (3, 6, 1, 9)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 5)
3
3 --> (3, 6, 5)
4 --> (3, 6, 5, 1)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 1)
3
3 --> (3, 6, 1)
4 --> (3, 6, 1, 4)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 5)
3
3 --> (3, 6, 5)
4 --> (3, 6, 5, 9)
4
4 --> (3, 6, 5, 9)
5 --> (3, 6, 5, 9, 6)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 8)
3
3 --> (3, 6, 8)
4 --> (3, 6, 8, 2)
4
4 --> (3, 6, 8, 2)
5 --> (3, 6, 8, 2, 3)
1
1 --> (3,)
2 --> (3, 6)
2
2 --> (3, 6)
3 --> (3, 6, 3)
3
3 --> (3,

In [105]:
# name2fullname = {i.split('-')[-1]:i for i in fld_list}
df_struct = get_structures_from_fldlist(FLD_LIST)
# df_struct# .sort_values('layers', ascending = False)['struct_name'].to_list()

tmp = df_struct.sort_values('layers', ascending = False)
layer2structlist = dict(zip(tmp['layers'].to_list(), tmp['struct_name'].to_list()))
layer2structlist

{'2-3': ['CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord'],
 '1-2': ['Tk:AnnoGrn^Tk:POSGrn^Tk:SlfGrn^Tk:SubWord==>St']}

In [106]:
FieldRepr = FieldRepr_Layer(FLD_LIST, FLD_END, meta_para)
# FieldRepr

In [108]:
data = FieldRepr(FLD_2_DATA)
# data['info']


struct_name <---------- CharGrn^PhonemeGrn^SyllableGrn==>Tk:SubWord 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable', 'B-St-Tk:SubWord-Char&Phoneme&Syllable', 'B-St-Tk:SubWord:Char&Phoneme&Syllable', 'B-St-Tk:SubWord', 'B-St-Tk:Anno', 'B-St-Tk:POS', 'B-St-Tk:Slf', 'B-St-Tk-Anno&POS&Slf&SubWord', 'B-St-Tk:Anno&POS&Slf&SubWord', 'B-St:Tk:Anno&POS&Slf&SubWord', 'B-St']

pipeline_name <---------- EL**B-St-Tk:SubWord-CharGrn=>B-St-Tk:SubWord-Char 
['B-St-Tk:SlfGrn', 'B-St-Tk:POSGrn', 'B-St-Tk:AnnoGrn', 'B-St-Tk:SubWord-CharGrn', 'B-St-Tk:SubWord-SyllableGrn', 'B-St-Tk:SubWord-PhonemeGrn', 'B-St-Tk:SubWord-Char', 'B-St-Tk:SubWord:Char', 'B-St-Tk:SubWord-Phoneme', 'B-St-Tk:SubWord:Phoneme', 'B-St-Tk:SubWord-Syllable', 'B-St-Tk:SubWord:Syllable

In [111]:
data['info'].shape

torch.Size([3, 512])

In [109]:
sum(p.numel() for p in FieldRepr.parameters())

252563456

In [91]:
# for para in FieldRepr.parameters():
#     print(para)

In [112]:
sum(p.numel() for p in FieldRepr.parameters())

252563456