# 1. Import Package

In [31]:
from scripts.applications.prediction_models.models.syntax_infused_model import *
from scripts.applications.prediction_models.models.sequential_model import *
from scripts.applications.prediction_models.models.neural_cky import *
from scripts.language_processing.language_builder.neural_builder.models.tn_pcfg import TNPCFG
from scripts.language_processing.language_builder.neural_builder.models.utils import rebuild_T_from_head_left_right

In [32]:
import numpy as np
import random

# 2. Variable and function Definitions

In [33]:
main_configuration = {
'cnt_channels': 19,
'cnt_words': 10,
'word_emb_size': 200,
'syntax_infused_model_args': 
    {'NT': 20,
              'T': 65,
              's_dim': 50,
              'r_dim': 75,
              'word_emb_size': 200,
            'cnt_words': 10,
            'summary_parameters': True,
    }
}
cnt_channels = main_configuration['cnt_channels']
cnt_words = main_configuration['cnt_words']
word_emb_size = main_configuration['word_emb_size']

In [4]:
def generate_random_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return 0.1 * np.random.random_sample((cnt_channels, cnt_time_samples)) * 90

def generate_random_eeg_microstates(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    return np.random.randint(0, cnt_words, (cnt_time_samples))

def generate_random_segmented_eeg_data(cnt_channels = 19, cnt_time_samples = 1000): # unit: mV
    dummy_eeg_data = generate_dummy_eeg_data(cnt_channels, cnt_time_samples)
    split_points = [0]
    p = np.random()
    for i in range(1, cnt_time_samples):
        if np.random() < p:
            split_points.append(i)
    split_points.append(cnt_time_samples)
    return [dummy_eeg_data[split_points[i - 1], split_points[i]] for i in range(1, len(split_points))]

def generate_random_corpus(word_count, cnt_article, article_max_length):
    return [np.random.randint(0, word_count, size = (np.random.randint(0, article_max_length))) for _ in range(cnt_article)]
    

# 3. Data Preparation

In [5]:
random_eeg_data = generate_random_eeg_data(cnt_channels = cnt_channels)

In [6]:
random_microstate_sequences = generate_random_eeg_microstates(cnt_channels = cnt_channels)

# 4. Word Embedding

In [7]:
from scripts.applications.prediction_models.models.word_embedding import *

In [8]:
embeding_model = WordEmbeddingModel(vocab_size = cnt_words, embedding_dim = word_emb_size, context_size = 2)

# 4.1 Trainning

In [9]:
sentences = [random_microstate_sequences]
embeding_model.train(sentences)

epoch = 0
loss = 2316.554201
epoch = 1
loss = 2265.522856
epoch = 2
loss = 2238.896778
epoch = 3
loss = 2220.725047
epoch = 4
loss = 2207.141982
epoch = 5
loss = 2195.629607
epoch = 6
loss = 2185.123014
epoch = 7
loss = 2175.435936
epoch = 8
loss = 2166.631076
epoch = 9
loss = 2157.718176


## 4.2 Extract Embeddings

In [10]:
word_embeddings = embeding_model.embeddings.weight

# 5. Modal Combination Model Creation

In [11]:
combine_model = SimpleConcatCombing()

# 6. Base Prediction Creation

In [12]:
LNN_electrode_value_based_prediction_model = LNNElectrodeValueBasedPredictionModel(ncp_input_size = cnt_channels, hidden_size=100, output_size=1, sequence_length=1)

# 7. Prediction pre-epileptic state possibility in each time point by using the Base Prediction Model 

## 7.1 Trainning

## 7.2 Inference

In [13]:
h = LNN_electrode_value_based_prediction_model.h
for t in range(random_eeg_data.shape[1]):
    input_t = torch.Tensor(random_eeg_data[:, t])
    out, h = LNN_electrode_value_based_prediction_model.forward(input_t)
h

tensor([[-0.8936, -0.9323, -0.9659,  0.3482, -0.3273, -0.2126,  0.8614,  0.6195,
         -0.2569, -0.9274, -0.9663,  0.9594,  0.8754, -0.9216, -0.1042,  0.7159,
          0.2378,  0.6213, -0.6234,  0.3669, -0.8865,  0.9663,  0.2433, -0.5480,
         -0.5298, -0.7604, -0.6182,  0.4420, -0.7968, -0.6165, -0.6118, -0.2197,
          0.5159,  0.1830,  0.9282,  0.9352,  0.9030,  0.1166,  0.7881, -0.6910,
         -0.0988, -0.6070, -0.0401, -0.3706, -0.6404, -0.3943,  0.6682, -0.7829,
         -0.1743,  0.9386, -0.9970, -0.9615,  0.9774, -0.8386,  0.7160,  0.9941,
          0.5405, -0.6762, -0.6185,  0.1220, -0.0267, -0.5907,  0.2524, -0.1405,
         -0.5595, -0.2707, -0.2793,  0.0176, -0.3586,  0.0797,  0.1152,  0.2297,
         -0.1632, -0.0934, -0.3794, -0.3670, -0.2746,  0.1510,  0.7708,  0.0479,
         -0.0873,  0.0363,  0.4477,  0.2921, -0.2954,  0.3075, -0.3463, -0.2922,
         -0.1568, -0.3208, -0.1908, -0.6281,  0.6914,  0.1704, -0.3391, -0.4388,
         -0.3655, -0.0671, -

# 8. Syntax-infused Prediction Model 

In [14]:
args = main_configuration['syntax_infused_model_args']
args['word_embeddings'] = word_embeddings

## 8.1 Build Grammar

### 8.1.1 TN-PCFG Model Creation

In [15]:
tn_pcfg = TNPCFG(args=args)
# print(f"{tn_pcfg}")

build td pcfg
device = cpu, NT = 20, T = 65, V = 10, s_dim = 50, r = 75, word_emb_size = 200


In [16]:
tn_pcfg

TNPCFG(
  (root_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (3): Linear(in_features=50, out_features=20, bias=True)
  )
  (term_mlp): Sequential(
    (0): Linear(in_features=50, out_features=50, bias=True)
    (1): ResLayer(
      (linear): Sequential(
        (0): Linear(in_features=50, out_features=50, bias=True)
        (1): ReLU()
        (2): Linear(in_features=50, out_features=50, bias=True)
        (3): ReLU()
      )
    )
    (2): ResLayer(
      (linear): Sequential(
        (0): Linear(in

### 8.1.2 Corpus Creation

In [17]:
# corpus is a order-two list. Each element is a list that contain a sequence of microstates
corpus = generate_random_corpus(cnt_words, 1000, 2000)

### 8.1.4 Trainning

### 8.1.3 Do Inference in TN-PCFG to Extract Grammars

In [18]:
sentence_for_inference = corpus[0]

In [19]:
inference = tn_pcfg.forward(input={'word':(np.array(sentence_for_inference)).reshape((1, len(corpus[0])))})

begin forward>>>>>>>>>>>>>
x = [[5 9 2 6 7 0 8 8 7 1 9 5 4 5 8 5 3 6 7 8 3 4 9 2 2 9 0 2 9 2 2 3 8 2 3 3
  0 8 0 2 3 4 5 4 8 3 6 0 2 4 6 7 3 3 9 0 9 3 9 6 3 8 2 7 8 3 5 5 4 2 9 4
  1 0 9 8 7 4 6 0 5 5 3 0 4 4 0 2 8 7 7 4 4 4 2 8 2 6 7 2 6 6 0 9 1 6 4 2
  7 9 3 8 9 1 0 4 1 8 4 3 3 3 3 9 5 4 1 6 1 2 1 5 2 9 8 7 9 3 2 0 3 5 0 9
  0 6 3 6 5 3 4 1 2 6 5 0 0 4 2 9 4 3 7 2 5 2 8 9 9 5 7 9 3 3 7 6 5 2 4 2
  9 8 5 6 9 4 1 9 9 2 7 6 2 0 8 4 1 6 7 1 3 0 1 1 6 5 5 5 1 3 1 4 8 5 1 6
  1 1 8 0 9 5 7 0 0 6 7 7 6 3 2 6 0 1 6 4 0 0 7 2 1 6 0 0 1 8 4 8 4 5 9 9
  6 3 5 5 0 1 0 5 0 8 6 6 5 5 1 7 5 6 1 1 3 5 4 2 4 5 5 4 6 1 0 0 4 6 6 0
  4 8 6 0 0 1 3 4 7 4 5 8 6 8 2 0 1 2 3 3 8 9 0 9 4 8 3 8 7 3 1 3 3 6 6 1
  1 0 0 9 5 9 5 0 7 0 4 6 9 8 0 9 7 0 7 0 5 5 0 1 8 2 6 6 6 4 8 7 7 6 5 0
  3 1 6 2 1 7 3 3 3 0 9 5 4 6 7 3 1 4 9 4 0 4 4 2 3 6 5 8 3 2 6 4 7 2 3 8
  6 9 0 0 4 9 5 1 3 4 8 4 0 1 4 9 1 3 5 5 6 7 2 3 4 9 7 3 7 0 7 6 7 9 1 1
  3 9 6 5 2 5 6 0 3 5 1 7 9 7 1 1 2 0 4 0 6 8 4 1 4 4 2 5 5 5 8 2 5 7 6 5
  1 9 2

In [20]:
inference.keys()

dict_keys(['unary', 'root', 'head', 'left', 'right', 'kl'])

In [21]:
# convert the inference result of 'unary' array to a more formal form.
# the original unary is a 2-dimension array, in which i-th row is possibility of each terminate symbol directly deduct to the word at time point t.
# in the origin ouput, unary[i] = unary[j] if word_sequence[i] = word_sequence[j]
# now we put each unique word's feature into a 2-dimension matrix.
inference_unary = np.zeros((args['T'], args['cnt_words']))
original_unary = inference['unary'].detach().numpy()[0]
sequence_length = original_unary.shape[0]
for i in range(sequence_length):
    inference_unary[:, sentence_for_inference[i]] = original_unary[i]

In [22]:
def rebuild_T_from_head_left_right(head, left, right, NT, T):
    r_dim = head.shape[1]
    sum_NT_T = NT + T
    T = np.zeros((NT * sum_NT_T * sum_NT_T))
    for r in range(r_dim):
        T += np.kron(np.kron(head[:, r].detach().numpy(), left[:, r].detach().numpy()), right[:, r].detach().numpy())
    return T.reshape((NT, sum_NT_T, sum_NT_T))

In [23]:
T = rebuild_T_from_head_left_right(inference['head'][0], inference['left'][0], inference['right'][0], args['NT'], args['T'])

### 8.1.4 Complement grammar in args for NN-CYK model.

In [24]:
args['grammar_starts'] = inference['root'].detach().numpy()[0]
args['grammar_preterminates'] = inference_unary
args['grammar_double_nonterminates'] = T

In [25]:
# {
#     'head': np.random.random((args['NT'])),
#     'left': np.random.random((args['NT'] + args['T'], args['r_dim'])),
#     'right': np.random.random((args['NT'] + args['T'], args['r_dim']))
# }
args['beam_search_strategy'] = select_tops
args['preterminate_feature_generation_model'] = NN_CYK_FeatureCombingModel_Preterminate()
args['merge_model'] = NN_CYK_FeatureCombingModel_NonPreterminate()

In [26]:
nn_cyk_model = NN_CYK_Model(args)

### 8.1.5 Trainning

### 8.1.6 Inference

In [27]:
nn_cyk_model.forward(5)

append record.


tensor([-1.7470e-01, -9.9015e-01, -4.5680e-01,  6.3080e-01,  2.2052e-01,
        -7.0406e-03, -3.8170e-02, -5.4999e-01,  8.3020e-01,  3.2838e-01,
         1.0583e-01,  3.1033e-01,  2.2866e-01,  3.6989e-01, -3.6569e-01,
         7.6088e-01,  2.5127e-01, -1.0753e+00, -3.7243e-01, -3.0068e-01,
        -3.7434e-01,  4.7664e-01,  7.9388e-02,  3.8834e-01, -3.1606e-01,
         3.1148e-01,  4.0827e-02,  9.1925e-02,  5.6864e-01,  9.4673e-01,
        -6.2618e-01, -2.4650e-01,  3.7411e-01, -7.9313e-01,  1.7483e-01,
        -6.3663e-01, -4.2755e-02, -1.1456e-01,  2.7000e-01, -3.0094e-01,
        -1.7338e-01,  3.6630e-01,  2.7636e-01, -6.0237e-01, -7.9905e-02,
         6.5028e-02, -6.2205e-01,  4.7507e-01, -3.2025e-01,  1.9574e-01,
         5.8363e-01, -5.8914e-01, -2.3234e-01,  1.3944e+00,  1.1660e+00,
         9.3653e-02,  2.0989e-01,  8.7459e-02, -1.1467e+00,  1.1349e-01,
         5.0325e-01, -4.4695e-01,  5.5975e-01, -3.2556e-01, -2.7265e-01,
         1.1233e-01, -9.3233e-01, -1.0421e+00, -6.7

In [28]:
nn_cyk_model.forward(5)

append record.
fill record in t=1


tensor([ 0.1272,  0.1353,  0.2377,  0.3309, -0.1806,  0.5590,  0.0525,  0.0633,
         0.3696, -0.6724,  0.1980, -0.4173, -0.2284, -0.4956, -0.2927, -0.1055,
        -0.1757, -0.0144,  0.2956,  0.1919,  0.3481, -0.0436,  0.2674,  0.1037,
         0.4330, -0.2096, -0.1869, -0.0578,  0.0747, -0.0873, -0.0427, -0.3801,
         0.0920, -0.3652, -0.0531, -0.0454,  0.2238,  0.5878, -0.5348,  0.3666,
         0.1510, -0.0818,  0.0128, -0.1531, -0.3673, -0.4333,  0.2551,  0.3710,
         0.0408, -0.4413,  0.5292, -0.3761,  0.1423, -0.6953,  0.2112,  0.4118,
         0.1932, -0.0153,  0.1739,  0.1739,  0.2681,  0.2378, -0.3452,  0.3941,
         0.3300, -0.0015,  0.1783,  0.0571, -0.0564, -0.0942,  0.3491, -0.2167,
         0.1857,  0.4025,  0.1867,  0.1708, -0.2998, -0.1836,  0.3681, -0.3907,
        -0.4792,  0.0609,  0.1405,  0.3984, -0.3531,  0.1247, -0.3413,  0.3620,
        -0.1266,  0.0126, -0.0412, -0.2693, -0.1740,  0.0517, -0.1877,  0.5150,
         0.1997,  0.1115, -0.1218,  0.05

In [29]:
nn_cyk_model.forward(4)

append record.
fill record in t=2
fill record in t=2


tensor([ 7.5095e-03, -1.6782e-01, -1.9793e-01,  1.1756e-01,  4.3696e-01,
        -1.8677e-01, -1.3287e-01,  1.1223e-01, -3.9837e-01,  1.8934e-01,
         9.6943e-02, -7.3208e-03,  9.2107e-02, -5.1977e-02, -9.2098e-02,
        -1.2132e-01, -2.0864e-01, -7.4195e-03, -1.7298e-02,  2.2324e-01,
        -2.1396e-02,  5.3131e-02, -2.6031e-02, -1.5106e-01, -1.3720e-01,
         3.5854e-01, -8.6360e-02,  8.9088e-02, -6.4349e-02,  2.1659e-01,
         1.8413e-01,  2.2197e-01, -8.0544e-02,  1.9438e-01, -2.2864e-01,
         2.8345e-01,  2.8249e-04,  1.5541e-01, -1.0634e-01,  6.3375e-02,
         3.6012e-01,  1.3544e-01, -1.7911e-01, -6.4018e-02,  1.3253e-01,
        -3.5549e-01,  3.6925e-03,  2.6757e-02, -6.9488e-01, -3.7756e-02,
        -8.2965e-03,  6.9227e-02, -3.1946e-01, -2.8161e-01, -5.3112e-02,
        -9.8041e-02,  7.1338e-02,  4.0295e-01, -6.7822e-02,  2.2466e-01,
        -1.6567e-01, -2.2848e-01, -1.4007e-01, -3.1535e-03,  6.1339e-02,
         1.6902e-01, -3.9981e-01,  8.3154e-02, -8.5

In [30]:
nn_cyk_model.forward(7)

append record.
fill record in t=3
fill record in t=3
fill record in t=3


tensor([ 7.5095e-03, -1.6782e-01, -1.9793e-01,  1.1756e-01,  4.3696e-01,
        -1.8677e-01, -1.3287e-01,  1.1223e-01, -3.9837e-01,  1.8934e-01,
         9.6943e-02, -7.3208e-03,  9.2107e-02, -5.1977e-02, -9.2098e-02,
        -1.2132e-01, -2.0864e-01, -7.4195e-03, -1.7298e-02,  2.2324e-01,
        -2.1396e-02,  5.3131e-02, -2.6031e-02, -1.5106e-01, -1.3720e-01,
         3.5854e-01, -8.6360e-02,  8.9088e-02, -6.4349e-02,  2.1659e-01,
         1.8413e-01,  2.2197e-01, -8.0544e-02,  1.9438e-01, -2.2864e-01,
         2.8345e-01,  2.8249e-04,  1.5541e-01, -1.0634e-01,  6.3375e-02,
         3.6012e-01,  1.3544e-01, -1.7911e-01, -6.4018e-02,  1.3253e-01,
        -3.5549e-01,  3.6925e-03,  2.6757e-02, -6.9488e-01, -3.7756e-02,
        -8.2965e-03,  6.9227e-02, -3.1946e-01, -2.8161e-01, -5.3112e-02,
        -9.8041e-02,  7.1338e-02,  4.0295e-01, -6.7822e-02,  2.2466e-01,
        -1.6567e-01, -2.2848e-01, -1.4007e-01, -3.1535e-03,  6.1339e-02,
         1.6902e-01, -3.9981e-01,  8.3154e-02, -8.5

## 8.2 Combine All the models and make predictions

In [34]:
simple_full_connection_prediction_model = FCPrediction()

In [None]:
syntax_infused_model = \
    SyntaxInfusedModel(sequential_model = \
                       ,syntax_model = nn_cyk_model\
                       , combining_model = combine_model\
                       , prediction_model = simple_full_connection_prediction_model)