In [None]:
# 映射 合并 归零
# import numpy as np
# emb = np.load('embeddings_p2q_5_wl10.npy')
# fea = np.load('node_features.npy')
# emb[:,:13] = (emb[:,:13]+1)/2
# embed = np.concatenate((emb, fea), axis=1)
# embed[-1] = np.zeros(embed.shape[1])
# np.save('data/embeddings_p2q_5_wl10.npy', embed, allow_pickle=False)

In [2]:
import sys
import json
import numpy as np

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model
from keras.layers import Input, Embedding, Dropout, TimeDistributed, Dense, Add, add
from keras.layers import LeakyReLU, BatchNormalization

from utils import *
from AttentionWithContext import AttentionWithContext
from StructuredSelfAttentive import StructuredSelfAttentive
from AttentionWithMultiContext import AttentionWithMultiContext
from SkipConnection import SkipConnection


# = = = = = = = = = = = = = = =

is_GPU = True
save_weights = True
save_history = True

path_root = ''
path_to_code = path_root
path_to_data = path_root + 'data/'

sys.path.insert(0, path_to_code)

# = = = = = = = = = = = = = = =

# = = = = = hyper-parameters = = = = =

n_units = 60
mc_n_units = 100
da = 15
r = 10
drop_rate = 0.2
batch_size = 200
nb_epochs = 100
my_optimizer = 'adam'
my_patience = 5


# = = = = = data loading = = = = =

docs = np.load(path_to_data + 'documents_p2q_5_wl10.npy')
embeddings = np.load(path_to_data + 'embeddings_p2q_5_wl10.npy')

with open(path_to_data + 'train_idxs.txt', 'r') as file:
    train_idxs = file.read().splitlines()
    
train_idxs = [int(elt) for elt in train_idxs]

# create validation set
np.random.seed(12219)
# np.random.seed(1)
idxs_select_train = np.random.choice(range(len(train_idxs)),size=int(len(train_idxs)*0.80),replace=False)
idxs_select_val = np.setdiff1d(range(len(train_idxs)),idxs_select_train)

train_idxs_new = [train_idxs[elt] for elt in idxs_select_train]
val_idxs = [train_idxs[elt] for elt in idxs_select_val]

docs_train = docs[train_idxs_new,:,:]
docs_val = docs[val_idxs,:,:]

tgt = 2

with open(path_to_data + 'targets/train/target_' + str(tgt) + '.txt', 'r') as file:
    target = file.read().splitlines()

target_train = np.array([target[elt] for elt in idxs_select_train]).astype('float')
target_val = np.array([target[elt] for elt in idxs_select_val]).astype('float')

print('data loaded')

# = = = = = defining architecture = = = = =

sent_ints = Input(shape=(docs_train.shape[2],))

sent_wv = Embedding(input_dim=embeddings.shape[0],
                    output_dim=embeddings.shape[1],
                    weights=[embeddings],
                    input_length=docs_train.shape[2],
                    trainable=False,
                    )(sent_ints)

## HAN sent encoder
sent_wv_dr = Dropout(drop_rate)(sent_wv)
# sent_wv_dr = BatchNormalization(sent_wv_dr) ######
sent_wa = bidir_gru(sent_wv_dr, n_units, is_GPU)
sent_wa = bidir_gru(sent_wa, n_units, is_GPU) #########
sent_att_vec, word_att_coeffs = AttentionWithContext(return_coefficients=True)(sent_wa)
sent_att_vec_dr = Dropout(drop_rate)(sent_att_vec)
# sent_att_vec_dr = BatchNormalization(sent_att_vec_dr) ######
# skip connection
sent_added = SkipConnection()([sent_att_vec_dr, sent_wv_dr])
sent_encoder = Model(sent_ints, sent_added)

## structured self-attentive
mc_sent_wv_dr = Dropout(drop_rate)(sent_wv)
# mc_sent_wv_dr = BatchNormalization(mc_sent_wv_dr) ######
mc_sent_wa = bidir_lstm(mc_sent_wv_dr, mc_n_units, is_GPU)
mc_sent_wa = bidir_lstm(mc_sent_wa, mc_n_units, is_GPU) #######
mc_sent_att_vec, mc_word_att_coeffs = StructuredSelfAttentive(da=da, r=r, return_coefficients=True)(mc_sent_wa)
mc_sent_att_vec_dr = Dropout(drop_rate)(mc_sent_att_vec)
# mc_sent_att_vec_dr = BatchNormalization(mc_sent_att_vec_dr) ######
# skip connection
mc_sent_added = SkipConnection()([mc_sent_att_vec_dr, mc_sent_wv_dr])
mc_sent_encoder = Model(sent_ints, mc_sent_added)

## combine context and target
doc_ints = Input(shape=(docs_train.shape[1], docs_train.shape[2],))
# sentence encoder
sent_att_vecs_dr = TimeDistributed(sent_encoder)(doc_ints)
doc_sa = bidir_gru(sent_att_vecs_dr, n_units, is_GPU)
# context
mc_sent_att_vecs_dr = TimeDistributed(mc_sent_encoder)(doc_ints)
mc_doc_sa = bidir_gru(mc_sent_att_vecs_dr, n_units, is_GPU)

doc_att_vec, sent_att_coeffs = AttentionWithMultiContext(return_coefficients=True)([doc_sa, mc_doc_sa])
doc_att_vec_dr = Dropout(drop_rate)(doc_att_vec)
# doc_att_vec_dr = BatchNormalization(doc_att_vec_dr) ######

# new
# hid = Dense(units=4, activation='tanh')(doc_att_vec_dr)
# hid1 = Dense(units=16, activation='relu')(doc_att_vec_dr) ###
# hid1r = Dropout(drop_rate)(hid1)
# hid2 = Dense(units=16, activation='relu')(hid1r)
# hid2r = Dropout(drop_rate)(hid2)
# hid3 = Dense(units=16, activation='relu')(hid2r)
# hid3r = Dropout(drop_rate)(hid3)
# hid4 = Dense(units=16, activation='relu')(hid3r)
# hid4r = Dropout(drop_rate)(hid4)
#   hid1 = Dense(units=32, activation='sigmoid')(doc_att_vec_dr)
#   hid2 = Dense(units=8, activation='sigmoid')(hid1)
hid1 = LeakyReLU(alpha=0.01)(doc_att_vec_dr)
hid2 = LeakyReLU(alpha=0.01)(hid1)
preds = Dense(units=1)(hid2)

model = Model(doc_ints, preds)

model.compile(loss='mean_squared_error', optimizer=my_optimizer, metrics=['mae'])

print('model compiled')

# = = = = = training = = = = =

early_stopping = EarlyStopping(monitor='val_loss',
                                patience=my_patience,
                                mode='min')

# save model corresponding to best epoch
checkpointer = ModelCheckpoint(filepath=path_to_data + 'model_sc' + str(tgt), 
                                verbose=1, 
                                save_best_only=True,
                                save_weights_only=True)

if save_weights:
    my_callbacks = [early_stopping, checkpointer]
else:
    my_callbacks = [early_stopping]

model.fit(docs_train, 
            target_train,
            batch_size = batch_size,
            epochs = nb_epochs,
            validation_data = (docs_val,target_val),
            callbacks = my_callbacks)

hist = model.history.history

if save_history:
    with open(path_to_data + 'model_history_sc' + str(tgt) + '_sc.json', 'w') as file:
        json.dump(hist, file, sort_keys=False, indent=4)

print('* * * * * * * target',tgt,'done * * * * * * *')   

data loaded
model compiled
Train on 59980 samples, validate on 14995 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.55819, saving model to data/model_sc2
Epoch 2/100

Epoch 00002: val_loss improved from 0.55819 to 0.50988, saving model to data/model_sc2
Epoch 3/100

Epoch 00003: val_loss improved from 0.50988 to 0.50027, saving model to data/model_sc2
Epoch 4/100

Epoch 00004: val_loss improved from 0.50027 to 0.48132, saving model to data/model_sc2
Epoch 5/100

Epoch 00005: val_loss improved from 0.48132 to 0.47970, saving model to data/model_sc2
Epoch 6/100

Epoch 00006: val_loss improved from 0.47970 to 0.46334, saving model to data/model_sc2
Epoch 7/100

Epoch 00007: val_loss improved from 0.46334 to 0.45276, saving model to data/model_sc2
Epoch 8/100

Epoch 00008: val_loss improved from 0.45276 to 0.45274, saving model to data/model_sc2
Epoch 9/100

Epoch 00009: val_loss improved from 0.45274 to 0.44346, saving model to data/model_sc2
Epoch 10/100

Epoch 00010: 

In [3]:
for tgt in [0,1,3]:
  with open(path_to_data + 'targets/train/target_' + str(tgt) + '.txt', 'r') as file:
      target = file.read().splitlines()

  target_train = np.array([target[elt] for elt in idxs_select_train]).astype('float')
  target_val = np.array([target[elt] for elt in idxs_select_val]).astype('float')

  print('data loaded')

  # = = = = = defining architecture = = = = =

  sent_ints = Input(shape=(docs_train.shape[2],))

  sent_wv = Embedding(input_dim=embeddings.shape[0],
                      output_dim=embeddings.shape[1],
                      weights=[embeddings],
                      input_length=docs_train.shape[2],
                      trainable=False,
                      )(sent_ints)

  ## HAN sent encoder
  sent_wv_dr = Dropout(drop_rate)(sent_wv)
  # sent_wv_dr = BatchNormalization(sent_wv_dr) ######
  sent_wa = bidir_gru(sent_wv_dr, n_units, is_GPU)
  sent_wa = bidir_gru(sent_wa, n_units, is_GPU) #########
  sent_att_vec, word_att_coeffs = AttentionWithContext(return_coefficients=True)(sent_wa)
  sent_att_vec_dr = Dropout(drop_rate)(sent_att_vec)
  # sent_att_vec_dr = BatchNormalization(sent_att_vec_dr) ######
  # skip connection
  sent_added = SkipConnection()([sent_att_vec_dr, sent_wv_dr])
  sent_encoder = Model(sent_ints, sent_added)

  ## structured self-attentive
  mc_sent_wv_dr = Dropout(drop_rate)(sent_wv)
  # mc_sent_wv_dr = BatchNormalization(mc_sent_wv_dr) ######
  mc_sent_wa = bidir_lstm(mc_sent_wv_dr, mc_n_units, is_GPU)
  mc_sent_wa = bidir_lstm(mc_sent_wa, mc_n_units, is_GPU) #######
  mc_sent_att_vec, mc_word_att_coeffs = StructuredSelfAttentive(da=da, r=r, return_coefficients=True)(mc_sent_wa)
  mc_sent_att_vec_dr = Dropout(drop_rate)(mc_sent_att_vec)
  # mc_sent_att_vec_dr = BatchNormalization(mc_sent_att_vec_dr) ######
  # skip connection
  mc_sent_added = SkipConnection()([mc_sent_att_vec_dr, mc_sent_wv_dr])
  mc_sent_encoder = Model(sent_ints, mc_sent_added)

  ## combine context and target
  doc_ints = Input(shape=(docs_train.shape[1], docs_train.shape[2],))
  # sentence encoder
  sent_att_vecs_dr = TimeDistributed(sent_encoder)(doc_ints)
  doc_sa = bidir_gru(sent_att_vecs_dr, n_units, is_GPU)
  # context
  mc_sent_att_vecs_dr = TimeDistributed(mc_sent_encoder)(doc_ints)
  mc_doc_sa = bidir_gru(mc_sent_att_vecs_dr, n_units, is_GPU)

  doc_att_vec, sent_att_coeffs = AttentionWithMultiContext(return_coefficients=True)([doc_sa, mc_doc_sa])
  doc_att_vec_dr = Dropout(drop_rate)(doc_att_vec)
  # doc_att_vec_dr = BatchNormalization(doc_att_vec_dr) ######

  # new
  # hid = Dense(units=4, activation='tanh')(doc_att_vec_dr)
  # hid1 = Dense(units=16, activation='relu')(doc_att_vec_dr) ###
  # hid1r = Dropout(drop_rate)(hid1)
  # hid2 = Dense(units=16, activation='relu')(hid1r)
  # hid2r = Dropout(drop_rate)(hid2)
  # hid3 = Dense(units=16, activation='relu')(hid2r)
  # hid3r = Dropout(drop_rate)(hid3)
  # hid4 = Dense(units=16, activation='relu')(hid3r)
  # hid4r = Dropout(drop_rate)(hid4)
  #   hid1 = Dense(units=32, activation='sigmoid')(doc_att_vec_dr)
  #   hid2 = Dense(units=8, activation='sigmoid')(hid1)
  hid1 = LeakyReLU(alpha=0.01)(doc_att_vec_dr)
  hid2 = LeakyReLU(alpha=0.01)(hid1)
  preds = Dense(units=1)(hid2)

  model = Model(doc_ints, preds)

  model.compile(loss='mean_squared_error', optimizer=my_optimizer, metrics=['mae'])

  print('model compiled')

  # = = = = = training = = = = =

  early_stopping = EarlyStopping(monitor='val_loss',
                                  patience=my_patience,
                                  mode='min')

  # save model corresponding to best epoch
  checkpointer = ModelCheckpoint(filepath=path_to_data + 'model_sc' + str(tgt), 
                                  verbose=1, 
                                  save_best_only=True,
                                  save_weights_only=True)

  if save_weights:
      my_callbacks = [early_stopping, checkpointer]
  else:
      my_callbacks = [early_stopping]

  model.fit(docs_train, 
              target_train,
              batch_size = batch_size,
              epochs = nb_epochs,
              validation_data = (docs_val,target_val),
              callbacks = my_callbacks)

  hist = model.history.history

  if save_history:
      with open(path_to_data + 'model_history_sc' + str(tgt) + '_sc.json', 'w') as file:
          json.dump(hist, file, sort_keys=False, indent=4)

  print('* * * * * * * target',tgt,'done * * * * * * *')

data loaded
model compiled
Train on 59980 samples, validate on 14995 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.38724, saving model to data/model_sc0
Epoch 2/100

Epoch 00002: val_loss improved from 0.38724 to 0.30247, saving model to data/model_sc0
Epoch 3/100

Epoch 00003: val_loss improved from 0.30247 to 0.26064, saving model to data/model_sc0
Epoch 4/100

Epoch 00004: val_loss improved from 0.26064 to 0.25006, saving model to data/model_sc0
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.25006
Epoch 6/100

Epoch 00006: val_loss improved from 0.25006 to 0.21825, saving model to data/model_sc0
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.21825
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.21825
Epoch 9/100

Epoch 00009: val_loss improved from 0.21825 to 0.20521, saving model to data/model_sc0
Epoch 10/100

Epoch 00010: val_loss improved from 0.20521 to 0.20393, saving model to data/model_sc0
Epoch 11/100

Epoch 00011: val_los