# 1. Import Package

In [1]:
from scripts.applications.prediction_models.models.syntax_infused_model import *
from scripts.applications.prediction_models.models.sequential_model import *
from scripts.applications.prediction_models.models.neural_cky import *
from scripts.language_processing.language_builder.neural_builder.models.tn_pcfg import TNPCFG
from scripts.language_processing.language_builder.neural_builder.models.utils import rebuild_T_from_head_left_right

In [2]:
import numpy as np
import random

# 2. Variable and function Definitions

In [3]:
main_configuration = {
'cnt_channels': 19,
'cnt_words': 10,
'word_emb_size': 200,
'syntax_infused_model_args': 
    {'NT': 20,
              'T': 65,
              's_dim': 50,
              'r_dim': 75,
              'word_emb_size': 200,
            'cnt_words': 10,
            'summary_parameters': True,
    }
}
cnt_channels = main_configuration['cnt_channels']
cnt_words = main_configuration['cnt_words']
word_emb_size = main_configuration['word_emb_size']

In [4]:
def generate_random_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return 0.1 * np.random.random_sample((cnt_channels, cnt_time_samples)) * 90

def generate_random_eeg_microstates(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return np.random.randint(0, cnt_words, (cnt_time_samples))

def generate_random_segmented_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    dummy_eeg_data = generate_dummy_eeg_data(cnt_channels, cnt_time_samples)
    split_points = [0]
    p = np.random()
    for i in range(1, cnt_time_samples):
        if np.random() < p:
            split_points.append(i)
    split_points.append(cnt_time_samples)
    return [dummy_eeg_data[split_points[i - 1], split_points[i]] for i in range(1, len(split_points))]

def generate_random_corpus(word_count, cnt_article, article_max_length):
    return [np.random.randint(0, word_count, size = (np.random.randint(0, article_max_length))) for _ in range(cnt_article)]
    

# 3. Data Preparation

## 3.1 Prerequisite - Dictionary Building

In [17]:
random_eeg_data = generate_random_eeg_data(cnt_channels = cnt_channels)

In [18]:
random_microstate_sequences = generate_random_eeg_microstates(cnt_channels = cnt_channels)

# 4. Word Embedding

In [19]:
from scripts.applications.prediction_models.models.word_embedding import *

In [20]:
embeding_model = WordEmbeddingModel(vocab_size = cnt_words, embedding_dim = word_emb_size, context_size = 2)

# 4.1 Trainning

In [21]:
sentences = [random_microstate_sequences]
embeding_model.train(sentences)

epoch = 0
loss = 2319.711887
epoch = 1
loss = 2272.067009
epoch = 2
loss = 2243.640959
epoch = 3
loss = 2223.275610
epoch = 4
loss = 2207.727513
epoch = 5
loss = 2194.905063
epoch = 6
loss = 2183.395003
epoch = 7
loss = 2172.924126
epoch = 8
loss = 2163.637687
epoch = 9
loss = 2154.862836


## 4.2 Extract Embeddings

In [22]:
word_embeddings = embeding_model.embeddings.weight

# 5. Modal Combination Model Creation

In [23]:
combine_model = SimpleConcatCombing()

# 6. Base Prediction Creation

In [24]:
LNN_electrode_value_based_prediction_model = LNNElectrodeValueBasedPredictionModel(ncp_input_size = cnt_channels, hidden_size=100, output_size=1, sequence_length=1)

# 7. Prediction pre-epileptic state possibility in each time point by using the Base Prediction Model 

## 7.1 Trainning

## 7.2 Inference

In [25]:
h = LNN_electrode_value_based_prediction_model.h
for t in range(random_eeg_data.shape[1]):
    input_t = torch.Tensor(random_eeg_data[:, t])
    out, h = LNN_electrode_value_based_prediction_model.forward(input_t)
h

tensor([[ 0.9766,  0.4991,  0.5909,  0.9743, -0.5163, -0.9464, -0.4395, -0.3884,
          0.5344,  0.9542, -0.8245,  0.9662, -0.5409,  0.0100, -0.7164,  0.2344,
         -0.6876, -0.9105,  0.9108,  0.0628,  0.0934, -0.1050,  0.9922, -0.8664,
          0.6076, -0.9161, -0.6411, -0.3184, -0.5594,  0.3597, -0.0733, -0.6132,
         -0.9692,  0.3023,  0.8505,  0.9691, -0.2219, -0.3423, -0.4919, -0.5254,
          0.1081,  0.7915,  0.7671, -0.9033,  0.9650,  0.4035, -0.9247, -0.3916,
          0.7467,  0.9494,  0.3338, -0.5332, -0.8956, -0.8575,  0.4037,  0.0835,
          0.8984, -0.8676,  0.5323, -0.0046,  0.1823,  0.1825,  0.2357, -0.1049,
          0.0123,  0.5079,  0.4044, -0.2313, -0.0747, -0.2009,  0.1684,  0.2198,
         -0.4991, -0.1377, -0.4898,  0.4581, -0.3525,  0.1756, -0.5211,  0.2688,
         -0.4951, -0.0279, -0.3512, -0.0576, -0.1016, -0.0119,  0.6880,  0.1726,
          0.2249,  0.1484, -0.4089, -0.0568, -0.2631,  0.0322,  0.2280,  0.3152,
          0.1371,  0.1627, -

# 8. Syntax-infused Prediction Model 

In [26]:
args = main_configuration['syntax_infused_model_args']
args['word_embeddings'] = word_embeddings

## 8.1 Build Grammar

### 8.1.1 TN-PCFG Model Creation

In [27]:
tn_pcfg = TNPCFG(args=args)
# print(f"{tn_pcfg}")

build td pcfg
device = cpu, NT = 20, T = 65, V = 10, s_dim = 50, r = 75, word_emb_size = 200


In [28]:
tn_pcfg

TNPCFG(
  (root_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (3): Linear(in_features=50, out_features=20, bias=True)
  )
  (term_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in

### 8.1.2 Corpus Creation

In [29]:
# corpus is a order-two list. Each element is a list that contain a sequence of microstates
corpus = generate_random_corpus(cnt_words, 1000, 2000)

### 8.1.4 Trainning

### 8.1.3 Do Inference in TN-PCFG to Extract Grammars

In [30]:
sentence_for_inference = corpus[0]

In [31]:
inference = tn_pcfg.forward(input={'word':(np.array(sentence_for_inference)).reshape((1, len(corpus[0])))})

begin forward>>>>>>>>>>>>>
x = [[8 9 7 2 1 6 7 8 2 3 8 7 0 9 8 7 3 3 4 2 7 5 3 7 6 2 6 2 8 3 7 4 7 8 0 6
  1 9 3 7 0 3 6 3 4 5 3 8 6 2 4 3 4 2 7 6 0 8 9 0 4 2 5 7 9 0 4 8 4 3 2 4
  5 0 5 1 8 2 4 5 6 1 9 8 1 4 3 2 4 0 0 4 4 0 7 9 4 9 2 9 2 1 9 1 7 8 7 0
  8 6 4 9 1 5 4 2 3 3 3 2 8 5 2 3 5 7 8 2 2 0 3 4 7 6 0 7 9 5 2 3 3 5 0 2
  2 0 7 8 1 9 2 3 9 3 9 2 4 9 2 1 9 4 0 5 3 7 7 6 1 6 3 0 8 8 3 2 3 8 7 5
  0 1 6 5 3 0 5 5 6 1 8 8 3 1 9 2 0 3 9 0 8 1 6 3 6 9 3 2 9 0 2 2 9 9 6 9
  1 7 3 8 7 5 0 3 4 6 4 5 6 9 7 2 7 8 1 5 7 1 3 6 3 9 3 6 3 8 1 8 4 0 9 7
  7 5 6 2 5 8 5 6 9 7 5 6 7 9 0 7 0 6 3 4 6 1 5 8 1 6 0 7 7 4 3 9 1 0 1 9
  8 0 0 0 9 0 5 3 1 3 0 9 0 0 5 5 1 1 4 6 6 9 2 9 3 8 6 9 4 6 0 1 7 5 0 1
  0 2 1 5 6 5 0 6 3 6 0 2 0 5 2 5 6 8 5 1 3 1 3 4 9 8 9 5 6 7 3 4 8 4 6 9
  7 1 2 2 2 3 7 2 0 5 2 7 8 9 8 0 2 3 6 2 8 5 2 2 7 9 2 5 3 0 2 7 2 7 3 6
  5 2 8 4 5 6 3 7 6 2 4 6 4 3 1 9 4 4 2 6 9 9 8 6 0 0 5 4 7 7 2 4 7 8 6 9
  8 5 7 2 4 2 6 8 7 4 4 1 6 1 7 5 7 2 4 0 1 9 2 7 3 2 7 7 8 9 7 2 7 6 7 4
  8 3 8

In [32]:
inference.keys()

dict_keys(['unary', 'root', 'head', 'left', 'right', 'kl'])

In [33]:
# convert the inference result of 'unary' array to a more formal form.
# the original unary is a 2-dimension array, in which i-th row is possibility of each terminate symbol directly deduct to the word at time point t.
# in the origin ouput, unary[i] = unary[j] if word_sequence[i] = word_sequence[j]
# now we put each unique word's feature into a 2-dimension matrix.
inference_unary = np.zeros((args['T'], args['cnt_words']))
original_unary = inference['unary'].detach().numpy()[0]
sequence_length = original_unary.shape[0]
for i in range(sequence_length):
    inference_unary[:, sentence_for_inference[i]] = original_unary[i]

In [34]:
def rebuild_T_from_head_left_right(head, left, right, NT, T):
    r_dim = head.shape[1]
    sum_NT_T = NT + T
    T = np.zeros((NT * sum_NT_T * sum_NT_T))
    for r in range(r_dim):
        T += np.kron(np.kron(head[:, r].detach().numpy(), left[:, r].detach().numpy()), right[:, r].detach().numpy())
    return T.reshape((NT, sum_NT_T, sum_NT_T))

In [35]:
T = rebuild_T_from_head_left_right(inference['head'][0], inference['left'][0], inference['right'][0], args['NT'], args['T'])

### 8.1.4 Complement grammar in args for NN-CYK model.

In [36]:
args['grammar_starts'] = inference['root'].detach().numpy()[0]
args['grammar_preterminates'] = inference_unary
args['grammar_double_nonterminates'] = T

In [37]:
# {
#     'head': np.random.random((args['NT'])),
#     'left': np.random.random((args['NT'] + args['T'], args['r_dim'])),
#     'right': np.random.random((args['NT'] + args['T'], args['r_dim']))
# }
args['beam_search_strategy'] = select_tops
args['preterminate_feature_generation_model'] = NN_CYK_FeatureCombingModel_Preterminate()
args['merge_model'] = NN_CYK_FeatureCombingModel_NonPreterminate()

In [38]:
nn_cyk_model = NN_CYK_Model(args)

### 8.1.5 Trainning

### 8.1.6 Inference

In [39]:
nn_cyk_model.forward(5)

append record.


tensor([ 0.7401, -0.1256, -0.1010, -0.5411, -0.5840, -0.1486,  0.1459, -0.0258,
         0.7829, -0.4209,  0.1955,  0.5428,  0.2669, -0.1389, -0.6353,  0.7618,
        -0.9138, -0.9114, -0.7384, -0.1219, -0.2241, -0.4728, -0.7312,  0.2011,
         0.5128, -0.0093, -1.0061,  0.4000,  0.4479,  0.8501, -0.0963,  0.1462,
        -0.7227,  0.8879, -0.6507, -0.3470,  0.2301,  0.2234, -0.5738,  0.3307,
        -0.0110, -1.1734, -0.4960, -0.0873,  0.8719,  0.5835,  0.2668,  0.4019,
        -1.2909,  0.8306,  0.0714, -0.6806,  0.3403, -0.0030, -0.4116, -0.6925,
        -0.3118, -0.3374, -0.3195,  0.9973, -0.3208,  0.6672,  0.3906, -0.9831,
         0.8489,  0.2963,  0.0044, -0.2399, -0.5768,  0.2718,  0.8990,  0.0952,
         0.4226,  0.1388,  0.8551, -0.1433, -0.1541, -0.1093, -0.2150,  0.5949,
         0.7079, -0.0673, -0.2975,  0.1168, -0.4673, -0.3232, -0.3551, -0.9788,
         0.1420, -0.1587,  0.5684, -0.0234,  0.2520,  0.1809,  0.4738, -0.9502,
        -0.1888,  0.0876,  0.4909, -0.36

In [40]:
nn_cyk_model.forward(5)

append record.
fill record in t=1


tensor([ 4.7252e-01,  2.7013e-01,  3.7258e-03,  3.2793e-01,  5.3487e-02,
         4.6068e-01,  3.9665e-01,  2.9438e-01,  1.7299e-01,  2.4426e-01,
        -4.6722e-01,  3.2486e-01, -4.1011e-02,  5.4341e-01, -1.0404e-01,
        -1.9778e-01, -2.9065e-01,  5.5908e-02,  7.7888e-01,  1.1204e-01,
        -1.2551e-01,  8.6686e-02, -2.4632e-01, -2.5040e-01, -3.2811e-01,
        -7.3649e-01, -2.2719e-02, -8.0527e-01, -3.0085e-01, -2.4382e-01,
         3.1719e-01, -2.3702e-01, -9.2219e-02,  3.3333e-01,  3.8298e-01,
        -2.3147e-01,  1.3858e-01, -4.6851e-01,  4.3485e-01,  5.6681e-02,
         3.7423e-01, -4.5774e-01, -7.6377e-02,  2.0358e-01, -2.7485e-01,
         1.5779e-01,  3.8227e-02,  5.1753e-01, -8.7715e-01, -1.8769e-01,
         1.4242e-01, -4.4473e-01,  3.0828e-01,  5.0050e-03,  5.3718e-01,
         7.5887e-01,  3.1679e-01,  3.7959e-01,  2.8627e-01, -4.5115e-01,
         7.8995e-02,  2.1161e-01, -5.8046e-01, -3.7135e-01,  3.3012e-01,
        -1.8936e-01,  3.7534e-01,  1.9536e-01, -2.6

In [41]:
nn_cyk_model.forward(4)

append record.
fill record in t=2
fill record in t=2


tensor([-0.1818, -0.1178, -0.1578, -0.0070, -0.0084, -0.0460,  0.0532,  0.1301,
         0.1007, -0.2285, -0.2410,  0.4679,  0.1176,  0.2411, -0.1363, -0.1358,
        -0.0600,  0.0019, -0.3458,  0.0272, -0.0565, -0.0531,  0.0081,  0.2192,
         0.0301,  0.1114, -0.3513, -0.2460,  0.1854, -0.2197,  0.1038, -0.0256,
        -0.1426,  0.1065,  0.0792, -0.0234,  0.0739, -0.2953,  0.1497, -0.1045,
        -0.1796,  0.1129,  0.1203, -0.1708, -0.1600,  0.1200, -0.1049, -0.0205,
        -0.1411, -0.0186,  0.0761, -0.1457,  0.1526,  0.0122, -0.0474, -0.1892,
         0.2593, -0.1574,  0.2464,  0.1238, -0.0404,  0.2189, -0.0007, -0.1194,
         0.0278, -0.1407,  0.2725,  0.0475, -0.0526,  0.2570, -0.1673,  0.1154,
        -0.0824,  0.0087, -0.0149,  0.1678,  0.1139,  0.1952, -0.1563,  0.2322,
         0.2230,  0.1286,  0.1506, -0.0929, -0.0704,  0.1703, -0.0965, -0.1433,
        -0.1075,  0.0854,  0.2216, -0.1878, -0.0398, -0.1222, -0.1281, -0.1246,
         0.0572, -0.0940,  0.1130,  0.25

In [42]:
nn_cyk_model.forward(7)

append record.
fill record in t=3
fill record in t=3
fill record in t=3


tensor([-0.1299, -0.0996, -0.2362,  0.1488,  0.0020, -0.1327, -0.1277,  0.1036,
        -0.1071, -0.1316, -0.1947,  0.2407,  0.1613,  0.2023, -0.3371, -0.0404,
        -0.0061,  0.1392, -0.3777, -0.2039,  0.0501, -0.0046,  0.1930,  0.1626,
        -0.1210, -0.0047, -0.0767, -0.2457,  0.1607, -0.3050,  0.1679, -0.1665,
         0.1365,  0.1923,  0.0803, -0.0547,  0.0418, -0.1368,  0.1224, -0.2792,
        -0.0638,  0.1716,  0.1986, -0.2399, -0.0911,  0.0649, -0.2683, -0.0314,
        -0.0832,  0.0184,  0.1114, -0.2995,  0.1383,  0.0407,  0.0235, -0.2863,
         0.1913,  0.1310,  0.1276,  0.1757, -0.0307, -0.0160, -0.0033, -0.0412,
         0.0372, -0.2220,  0.0854,  0.1808, -0.1700,  0.1353, -0.0960,  0.1468,
        -0.0208,  0.0273, -0.0868,  0.1020,  0.1401,  0.0056, -0.0177,  0.2583,
         0.1050,  0.1144,  0.1965,  0.1301,  0.0486,  0.0924, -0.1423,  0.1115,
        -0.1009,  0.2186,  0.2671, -0.0692,  0.2253,  0.0067, -0.0978, -0.2496,
        -0.0327, -0.0629,  0.0970,  0.15

## 8.2 Combine All the models and make predictions

In [43]:
simple_full_connection_prediction_model = FCPrediction()

In [44]:
syntax_infused_model = \
    SyntaxInfusedModel(sequential_model = LNN_electrode_value_based_prediction_model\
                       ,syntax_model = nn_cyk_model\
                       , combining_model = combine_model\
                       , prediction_model = simple_full_connection_prediction_model)

In [47]:
for t in range(5):
    input_t = torch.Tensor(random_eeg_data[:, t])
    word_t = np.random.randint(0, cnt_words)
    syntax_infused_model.forward(word_t, input_t)

append record.
fill record in t=4
fill record in t=4
fill record in t=4
fill record in t=4


TypeError: expected Tensor as element 0 in argument 0, but got tuple

In [49]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([6,2,5])

torch.cat((tensor1, tensor2), dim=1)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)