# 1. Import Package

In [1]:
from scripts.applications.prediction_models.models.syntax_infused_model  import *
from scripts.applications.prediction_models.models.sequential_model import *
from scripts.applications.prediction_models.models.neural_cky import *
from scripts.language_processing.language_builder.neural_builder.models.tn_pcfg import TNPCFG
from scripts.language_processing.language_builder.neural_builder.models.utils import rebuild_T_from_head_left_right

In [2]:
import numpy as np
import random

# 2. Variable and function Definitions

In [3]:
main_configuration = {
'cnt_channels': 19,
'cnt_words': 10,
'word_emb_size': 200,
'syntax_infused_model_args': 
    {'NT': 20,
              'T': 65,
              's_dim': 50,
              'r_dim': 75,
              'word_emb_size': 200,
            'cnt_words': 10,
            'summary_parameters': True,
    }
}
cnt_channels = main_configuration['cnt_channels']
cnt_words = main_configuration['cnt_words']
word_emb_size = main_configuration['word_emb_size']

In [4]:
def generate_random_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return 0.1 * np.random.random_sample((cnt_channels, cnt_time_samples)) * 90

def generate_random_eeg_microstates(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return np.random.randint(0, cnt_words, (cnt_time_samples))

def generate_random_segmented_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    dummy_eeg_data = generate_dummy_eeg_data(cnt_channels, cnt_time_samples)
    split_points = [0]
    p = np.random()
    for i in range(1, cnt_time_samples):
        if np.random() < p:
            split_points.append(i)
    split_points.append(cnt_time_samples)
    return [dummy_eeg_data[split_points[i - 1], split_points[i]] for i in range(1, len(split_points))]

def generate_random_corpus(word_count, cnt_article, article_max_length):
    return [np.random.randint(0, word_count, size = (np.random.randint(0, article_max_length))) for _ in range(cnt_article)]
    

# 3. Data Preparation

## 3.1 Prerequisite - Dictionary Building

In [5]:
random_eeg_data = generate_random_eeg_data(cnt_channels = cnt_channels)

In [6]:
random_microstate_sequences = generate_random_eeg_microstates(cnt_channels = cnt_channels)

# 4. Word Embedding

In [7]:
from scripts.applications.prediction_models.models.word_embedding import *

In [8]:
embeding_model = WordEmbeddingModel(vocab_size = cnt_words, embedding_dim = word_emb_size, context_size = 2)

# 4.1 Trainning

In [9]:
sentences = [random_microstate_sequences]
embeding_model.train(sentences)

epoch = 0
loss = 2311.625854
epoch = 1
loss = 2270.306467
epoch = 2
loss = 2245.134150
epoch = 3
loss = 2226.618214
epoch = 4
loss = 2211.994990
epoch = 5
loss = 2199.835849
epoch = 6
loss = 2189.421719
epoch = 7
loss = 2179.727727
epoch = 8
loss = 2170.743100
epoch = 9
loss = 2162.398235


## 4.2 Extract Embeddings

In [10]:
word_embeddings = embeding_model.embeddings.weight

# 5. Modal Combination Model Creation

In [11]:
combine_model = SimpleConcatCombing()

# 6. Base Prediction Creation

In [12]:
LNN_electrode_value_based_prediction_model = LNNElectrodeValueBasedPredictionModel(ncp_input_size = cnt_channels, hidden_size=100, output_size=1, sequence_length=1)

# 7. Prediction pre-epileptic state possibility in each time point by using the Base Prediction Model 

## 7.1 Trainning

## 7.2 Inference

In [13]:
h = LNN_electrode_value_based_prediction_model.h
for t in range(random_eeg_data.shape[1]):
    input_t = torch.Tensor(random_eeg_data[:, t])
    out, _,_ = LNN_electrode_value_based_prediction_model.forward(input_t.view(-1, 19))
    print(out[0].item())
h

0.49817609786987305
0.44525301456451416
0.4979633092880249
0.408312052488327
0.44605588912963867
0.49403345584869385
0.4309931695461273
0.5023010969161987
0.41727784276008606
0.4578142762184143
0.48594948649406433
0.4444751441478729
0.43472370505332947
0.46560314297676086
0.45214176177978516
0.47070905566215515
0.5274943709373474
0.4576113820075989
0.469032347202301
0.42866000533103943
0.47006842494010925
0.48806026577949524
0.4451099634170532
0.4482690691947937
0.4962621033191681
0.39541518688201904
0.4778713285923004
0.5335752367973328
0.41171973943710327
0.5332785248756409
0.45624279975891113
0.41854384541511536
0.48194563388824463
0.4290144145488739
0.41108596324920654
0.4764980971813202
0.5352556705474854
0.4194592535495758
0.5156670212745667
0.48954179883003235
0.41386768221855164
0.43221327662467957
0.4411000609397888
0.43914273381233215
0.5034956336021423
0.4598582983016968
0.4724738597869873
0.39390045404434204
0.4508286416530609
0.4186765253543854
0.42213186621665955
0.502707

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]])

# 8. Syntax-infused Prediction Model 

In [14]:
args = main_configuration['syntax_infused_model_args']
args['word_embeddings'] = word_embeddings

## 8.1 Build Grammar

### 8.1.1 TN-PCFG Model Creation

In [15]:
tn_pcfg = TNPCFG(args=args)
# print(f"{tn_pcfg}")

build td pcfg
device = cpu, NT = 20, T = 65, V = 10, s_dim = 50, r = 75, word_emb_size = 200


In [16]:
tn_pcfg

TNPCFG(
  (root_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (3): Linear(in_features=50, out_features=20, bias=True)
  )
  (term_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in

### 8.1.2 Corpus Creation

In [17]:
# corpus is a order-two list. Each element is a list that contain a sequence of microstates
corpus = generate_random_corpus(cnt_words, 1000, 2000)

### 8.1.4 Trainning

### 8.1.3 Do Inference in TN-PCFG to Extract Grammars

In [18]:
sentence_for_inference = corpus[0]

In [19]:
inference = tn_pcfg.forward(torch.Tensor((np.array(sentence_for_inference)).reshape((1, len(corpus[0])))))

begin forward>>>>>>>>>>>>>, input shape = torch.Size([1, 1160])
b, n = 1, 1160
torch.Size([65, 10])
torch.Size([1, 1160, 65]) torch.Size([1, 20]) torch.Size([1, 20, 75]) torch.Size([1, 85, 75]) torch.Size([1, 85, 75])


In [20]:
inference.keys()

dict_keys(['unary', 'root', 'head', 'left', 'right', 'kl'])

In [21]:
# convert the inference result of 'unary' array to a more formal form.
# the original unary is a 2-dimension array, in which i-th row is possibility of each terminate symbol directly deduct to the word at time point t.
# in the origin ouput, unary[i] = unary[j] if word_sequence[i] = word_sequence[j]
# now we put each unique word's feature into a 2-dimension matrix.
inference_unary = np.zeros((args['T'], args['cnt_words']))
original_unary = inference['unary'].detach().numpy()[0]
sequence_length = original_unary.shape[0]
for i in range(sequence_length):
    inference_unary[:, sentence_for_inference[i]] = original_unary[i]

In [22]:
def rebuild_T_from_head_left_right(head, left, right, NT, T):
    r_dim = head.shape[1]
    sum_NT_T = NT + T
    T = np.zeros((NT * sum_NT_T * sum_NT_T))
    for r in range(r_dim):
        T += np.kron(np.kron(head[:, r].detach().numpy(), left[:, r].detach().numpy()), right[:, r].detach().numpy())
    return T.reshape((NT, sum_NT_T, sum_NT_T))

In [23]:
T = rebuild_T_from_head_left_right(inference['head'][0], inference['left'][0], inference['right'][0], args['NT'], args['T'])

### 8.1.4 Complement grammar in args for NN-CYK model.

In [24]:
args['grammar_starts'] = inference['root'].detach().numpy()[0]
args['grammar_preterminates'] = inference_unary
args['grammar_double_nonterminates'] = T

In [25]:
# {
#     'head': np.random.random((args['NT'])),
#     'left': np.random.random((args['NT'] + args['T'], args['r_dim'])),
#     'right': np.random.random((args['NT'] + args['T'], args['r_dim']))
# }
args['beam_search_strategy'] = select_tops
args['preterminate_feature_generation_model'] = NN_CYK_FeatureCombingModel_Preterminate()
args['merge_model'] = NN_CYK_FeatureCombingModel_NonPreterminate()

In [26]:
nn_cyk_model = NN_CYK_Model(args)

### 8.1.5 Trainning

### 8.1.6 Inference

In [27]:
nn_cyk_model.forward(5)

append record.


tensor([-0.1631, -0.4987,  0.3824, -0.5079,  0.3948,  0.2942,  0.4995, -0.3670,
        -0.3214,  0.3199,  0.2380,  0.2537, -0.7131,  0.7128,  0.2409,  0.5441,
         0.6384, -0.3322, -0.4539,  1.1360, -0.3881, -0.2092, -0.7406, -1.2709,
         0.0621,  0.2601, -0.5101, -0.2043,  0.6076,  0.2196,  0.0556,  0.4431,
        -0.5397, -0.6417, -0.2895,  0.0239, -0.2803,  0.0025,  0.9017,  0.2093,
         0.9854, -0.0431, -0.1981, -1.2723,  0.4834,  0.1454, -0.2356, -1.3993,
        -0.5699, -1.2867, -0.1886,  0.1910,  0.2272, -0.2531,  0.8667,  0.2742,
        -0.4327,  0.5810, -0.1900,  0.0128,  0.7619, -0.3764,  0.3779,  0.3589,
        -0.1042,  0.2049,  0.1012, -0.3294,  0.7652,  0.6055, -0.0637, -0.3265,
         0.1760,  0.2672,  0.2077, -0.9407,  0.4680,  1.0442, -0.2584, -0.1893,
         0.4986, -0.5035, -0.2658,  0.6790, -0.0209, -0.4498,  1.4464,  1.2651,
        -1.1126,  0.1000, -0.3007, -0.1532,  0.3036, -1.0202, -0.2244, -0.0847,
        -1.5917, -0.7371,  0.2031,  0.65

In [28]:
nn_cyk_model.forward(5)

append record.
fill record in t=1


tensor([ 0.3986, -0.1539,  0.4341, -0.4360, -0.5612, -0.2813,  0.5742,  0.0076,
        -0.0010, -0.6091, -0.1359, -0.1014,  0.5094,  0.0825,  0.0824, -0.3815,
         0.1843, -0.5014,  0.3864, -0.2798,  0.7923,  0.0784,  0.0234,  0.3921,
        -0.1126,  0.2874,  0.1700,  0.4623, -0.4098, -0.2324, -0.2110,  0.2769,
         0.5157, -0.4140, -0.3386, -0.5572, -0.0677,  0.2103,  0.0100, -0.2895,
         0.3113,  0.1663, -0.4643,  0.1922,  0.0477, -0.4896,  0.1799, -0.0458,
        -0.0432, -0.2462, -0.1337, -0.1421,  0.5575,  0.2571, -0.6888,  0.0578,
         0.0233, -0.1174,  0.5389,  0.1150, -0.2461,  0.3612, -0.9885,  0.1610,
        -0.2151, -0.0150, -0.0915, -0.1881,  0.2912, -0.2718, -0.4475,  0.2353,
        -0.5943, -0.0578, -0.1706,  0.0815, -0.0965,  0.1468, -0.2668, -0.1370,
        -0.7512,  0.0526, -0.0808,  0.2285,  0.0834,  0.0995,  0.6638,  0.2958,
        -0.1449, -0.1093,  0.4595, -0.5211,  0.0724,  0.2816,  0.2151,  0.2413,
        -0.0633,  0.3265, -0.0613, -0.13

In [29]:
nn_cyk_model.forward(4)

append record.
fill record in t=2
fill record in t=2


tensor([ 0.0724,  0.1693,  0.2056, -0.0873, -0.0451, -0.0636, -0.1464,  0.0990,
         0.0583, -0.0132,  0.0046,  0.1414,  0.1899,  0.1634,  0.0039,  0.0144,
        -0.0084,  0.1212, -0.0429, -0.0323, -0.2249, -0.0440, -0.0553, -0.2388,
         0.0191,  0.1014,  0.1591,  0.1444, -0.2864, -0.2257,  0.2338,  0.1054,
        -0.1560,  0.0546,  0.0311,  0.0115,  0.1054, -0.0487,  0.0543,  0.1294,
         0.2179,  0.0056,  0.1269,  0.1640, -0.3049,  0.1224,  0.1548, -0.0252,
         0.1298,  0.1096, -0.1316,  0.1775, -0.0547, -0.2871,  0.3072, -0.0864,
         0.0316,  0.0528, -0.0606, -0.1899, -0.2177,  0.2033,  0.1102,  0.1280,
         0.0321,  0.2366, -0.0573, -0.0732,  0.1107,  0.0211,  0.2059,  0.2389,
        -0.0206, -0.1452,  0.1064, -0.0140,  0.0097,  0.2169, -0.0321,  0.0372,
        -0.0178,  0.0659,  0.1582,  0.1617, -0.1520, -0.2787,  0.1437,  0.1520,
         0.1690, -0.2423, -0.1271,  0.2766, -0.0808,  0.3826,  0.1951,  0.1345,
        -0.0377,  0.2491,  0.3799, -0.21

In [30]:
nn_cyk_model.forward(7)

append record.
fill record in t=3
fill record in t=3
fill record in t=3


tensor([-0.1315,  0.2069,  0.0852, -0.0119, -0.0544,  0.0951, -0.0455,  0.0656,
         0.0448,  0.0543,  0.0087,  0.0097,  0.2189,  0.1464, -0.0649,  0.3035,
        -0.0017,  0.0996, -0.0035,  0.0265, -0.1625,  0.1990, -0.1491, -0.1266,
         0.0966, -0.0150, -0.0613,  0.1936, -0.0453, -0.1069,  0.1012,  0.1009,
        -0.0555, -0.0135,  0.0218, -0.0886,  0.0973,  0.2145, -0.0586,  0.2022,
         0.1616, -0.1206,  0.1252,  0.0600, -0.0670, -0.0210,  0.1239, -0.2700,
         0.1236,  0.0875, -0.1263,  0.1444, -0.0196, -0.0973,  0.1124, -0.0237,
        -0.0649,  0.0825,  0.0012, -0.1001,  0.0117,  0.0959, -0.0956,  0.0182,
         0.1154,  0.0587, -0.1590, -0.0045,  0.3425,  0.0303,  0.2409,  0.0970,
         0.0332,  0.0181,  0.0527,  0.1940, -0.0383, -0.0080, -0.1029,  0.2672,
         0.0718,  0.1163,  0.2668,  0.2383,  0.0084, -0.1719,  0.1132,  0.1563,
         0.2889,  0.0012, -0.2057,  0.1356, -0.1383,  0.1476,  0.1560,  0.1327,
         0.1302, -0.0578,  0.4387, -0.14

## 8.2 Combine All the models and make predictions

In [34]:
simple_full_connection_prediction_model = FCPrediction(input_size = 228)

In [35]:
syntax_infused_model = \
    SyntaxInfusedModel(sequential_model = LNN_electrode_value_based_prediction_model\
                       ,syntax_model = nn_cyk_model\
                       , combining_model = combine_model\
                       , prediction_model = simple_full_connection_prediction_model)

In [36]:
for t in range(5):
    input_t = torch.Tensor(random_eeg_data[:, t]).view(-1, 19)
    word_t = np.random.randint(0, cnt_words)
    syntax_infused_model.forward(word_t, input_t)

torch.Size([100])
append record.
fill record in t=5
fill record in t=5
fill record in t=5
fill record in t=5
fill record in t=5
torch.Size([100]) SimpleConcatCombing()
torch.Size([228]) torch.Size([228]) torch.Size([100]) torch.Size([128])
torch.Size([228])
torch.Size([100])
append record.
fill record in t=6
fill record in t=6
fill record in t=6
fill record in t=6
fill record in t=6
fill record in t=6
torch.Size([100]) SimpleConcatCombing()
torch.Size([228]) torch.Size([228]) torch.Size([100]) torch.Size([128])
torch.Size([228])
torch.Size([100])
append record.
fill record in t=7
fill record in t=7
fill record in t=7
fill record in t=7
fill record in t=7
fill record in t=7
fill record in t=7
torch.Size([100]) SimpleConcatCombing()
torch.Size([228]) torch.Size([228]) torch.Size([100]) torch.Size([128])
torch.Size([228])
torch.Size([100])
append record.
fill record in t=8
fill record in t=8
fill record in t=8
fill record in t=8
fill record in t=8
fill record in t=8
fill record in t=8
fil

In [None]:
simple_full_connection_prediction_model

In [36]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([6,2,5])

torch.cat((tensor1, tensor2))

tensor([1, 2, 3, 6, 2, 5])