# Initialization

In [1]:
%load_ext autoreload
%autoreload 2

# Use only one GPU
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import sys
sys.path.append('../')
sys.path.append('../isanlp/src/')

In [2]:
# Supress tensorflow memory appetites

import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

from tensorflow.python.keras import backend as K
K.set_session(sess)

In [3]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

# Check GPUs availability
get_available_gpus()

['/device:GPU:0']

In [200]:
import numpy as np
np.random.seed(31)

import os
import time
import isanlp
import json
import pickle
import vectorization
import pandas as pd

# Load data

In [228]:
data_path = '../../data/models_new/known_preds/tensors/'

In [232]:
data = vectorization.load_vectors(data_path)

y = vectorization.VectorizedDataset({'role' : data._data['role']})
del data._data['role']

print(data._data.keys())
print(data.size)

indexes = np.array(list(range(data.size)))

dict_keys(['arg_lemma', 'pred_lemma', 'dist', 'arg_case', 'pred_pos', 'arg_pos', 'syn_link_name', 'prepos', 'Aspect_arg', 'Number_arg', 'Tense_arg', 'Valency_arg', 'VerbForm_arg', 'Animacy_arg', 'Gender_arg', 'rel_pos'])
52701


# Models

In [None]:
#TODO: make connectors between feature models and neural network models.

In [153]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, LSTM, Convolution1D, Dropout, MaxPooling1D
from tensorflow.python.keras.preprocessing import sequence
from tensorflow.python.keras.layers import Flatten
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.layers import TimeDistributed
from tensorflow.python.keras.layers import Activation
from tensorflow.python.keras.layers import RepeatVector
from tensorflow.python.keras.layers import Permute
from tensorflow.python.keras.layers import Lambda
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.keras.layers import BatchNormalization
from tensorflow.python.keras.layers import Concatenate
from tensorflow.python.keras.layers import Bidirectional
from tensorflow.python.keras.layers import Masking
from gensim.models import Word2Vec
from tensorflow.python.keras.optimizers import Adam

In [8]:
def construct_simple_model():
    model = Sequential()
    model.add(Convolution1D(nb_filter=128, 
                            filter_length=2, 
                            border_mode='same', 
                            activation='relu', 
                            input_shape = (seq_embeded.shape[1], 
                                           get_embeddings_length(embeddings))))

    #model.add(MaxPooling1D(pool_length=2))
    model.add(LSTM(80))
    model.add(Dropout(0.1))
    model.add(Dense(60, activation='tanh'))
    model.add(Dense(number_of_roles, activation='softmax'))
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    
    return model

In [9]:
def construct_simple_attentional_model():
    units = 80
    _input = Input(shape = (arg_context_embedded.shape[1], 
                            get_embeddings_length(embeddings)), dtype = 'float')

    conv = Convolution1D(nb_filter=128, 
                        filter_length=2, 
                        border_mode='same', 
                        activation='relu')(_input)

    activations = LSTM(units, return_sequences=True)(conv)

    # compute importance for each step
    attention = TimeDistributed(Dense(1, activation='tanh'))(activations) 
    #attention = Dense(6, activation='tanh')(activations) 
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(units)(attention)
    attention = Permute([2, 1])(attention)

    # apply the attention
    sent_representation = merge([activations, attention], mode='mul')
    sent_representation = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)

    #dn = Dense(100, activation = 'tanh')(sent_representation)
    #probabilities = Dense(number_of_roles, activation='softmax')(dn)
    probabilities = Dense(number_of_roles, activation='softmax')(sent_representation)

    model = Model(input=_input, output=probabilities)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [10]:
def construct_graph_bidirectional_model():
    print('Bidirectional model.')
    
    arg_context_model = Sequential()
    arg_context_model.add(Convolution1D(nb_filter=150, 
                                        filter_length=2, 
                                        border_mode='same', 
                                        activation='relu',
                                        input_shape = (arg_context_embedded.shape[1], 
                                                       get_embeddings_length(embeddings))))
    arg_context_model.add(Bidirectional(LSTM(100), merge_mode = 'sum'))
    
    ###############################
    
    plain_model = Sequential()
    plain_model.add(Dense(700, 
                          input_shape=(plain_features.shape[1],), 
                          activation = 'relu'))
    
    ###############################
    
    final = Sequential()
    final.add(Merge([arg_context_model, plain_model], mode = 'concat', concat_axis=1))
    final.add(Dropout(0.3))
    
    #final.add(Dense(300, activation = 'relu'))
    final.add(Dense(300))
    final.add(BatchNormalization())
    final.add(Activation('relu'))
    final.add(Dropout(0.3))
    
    final.add(Dense(number_of_roles))
    final.add(BatchNormalization())
    final.add(Activation('softmax'))
    
    final.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return final

In [11]:
def construct_graph_attentional_model():
    print('Context attentional model.')
    
    def construct_attentional_part(context_length):
        _input = Input(shape = (context_length, 
                                get_embeddings_length(embeddings)), dtype = 'float')

        conv = Convolution1D(nb_filter=200, 
                            filter_length=2, 
                            border_mode='same', 
                            activation='relu')(_input)

        units = 100
        activations = LSTM(units, return_sequences=True)(conv)

        # compute importance for each step
        attention = TimeDistributed(Dense(1, activation='tanh'))(activations)  
        attention = Flatten()(attention)
        attention = Activation('softmax')(attention)
        attention = RepeatVector(units)(attention)
        attention = Permute([2, 1])(attention)

        # apply the attention
        seq_repr = merge([activations, attention], mode='mul')
        seq_repr = Lambda(lambda xin: K.sum(xin, axis=1))(seq_repr)
        seq_model = Model(input=_input, output=seq_repr)
        
        return seq_model
    
    arg_context_model = construct_attentional_part(arg_context_embedded.shape[1])
    pred_context_model = construct_attentional_part(pred_context_embedded.shape[1])
    
    ###############################
    
    plain_model = Sequential()
    plain_model.add(Dense(800, 
                          input_shape=(plain_features.shape[1],), 
                          activation = 'relu'))
    
    
    ###############################
    
    final = Sequential()
    final.add(Merge([arg_context_model, pred_context_model, plain_model], 
                    mode = 'concat', concat_axis=1))
    final.add(Dropout(0.3))
    
    #final.add(Dense(300, activation = 'relu'))
    final.add(Dense(400))
    final.add(BatchNormalization())
    final.add(Activation('relu'))
    final.add(Dropout(0.3))
    
    final.add(Dense(number_of_roles))
    final.add(BatchNormalization())
    final.add(Activation('softmax'))
    #final.add(Dense(number_of_roles, activation = 'softmax'))
#    final.add(BatchNormalization())
    #final.add(Activation('softmax'), W_regularizer=l2(0.01))
    #final.add(Dense(number_of_roles, activation='softmax', W_regularizer = l2(0.01)))
    
    final.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return final

In [12]:
def construct_graph_lstm_model(plain_features_shape):
    print('Context model.')
    
    def create_embed_model():
        embed_model = Sequential()
        embed_model.add(Dense(100, input_shape = (get_embeddings_length(embeddings), )))
        embed_model.add(BatchNormalization())
        embed_model.add(Activation('relu'))
        return embed_model
    
    def construct_attentional_part(context_length):
        seq_model = Sequential()
        seq_model.add(Convolution1D(nb_filter=50, 
                                    filter_length=1, 
                                    border_mode='same', 
                                    activation='relu',
                                    input_shape = (context_length, 
                                                   get_embeddings_length(embeddings))))
#         seq_model.add(Masking(mask_value=0., input_shape = (context_length, 
#                                                             get_embeddings_length(embeddings))))
        #seq_model.add(Masking(mask_value=1.))
        seq_model.add(Bidirectional(LSTM(50), merge_mode='sum'))
        #seq_model.add(LSTM(100))
        seq_model.add(Dense(50))
        seq_model.add(BatchNormalization())
        seq_model.add(Activation('relu'))
        
        return seq_model
    
    ###############################
    
    #arg_context_model = construct_attentional_part(arg_context_embedded.shape[1])
    pred_context_model = construct_attentional_part(pred_context_embedded.shape[1])
    
    ###############################
    
    plain_model = Sequential()
    plain_model.add(Dense(400, input_shape = plain_features_shape))
    plain_model.add(BatchNormalization())
    plain_model.add(Activation('relu'))
    
    ###############################
    
    arg_embed_model = create_embed_model()
    pred_embed_model = create_embed_model()
    
    ###############################
    
    final1 = Sequential()
    final1.add(Merge([
  #              arg_context_model, 
                     #pred_context_model,
                     arg_embed_model,
                     pred_embed_model,
                     plain_model], 
                    mode = 'concat', concat_axis=1))
    final1.add(Dropout(0.3))
    
    final1.add(Dense(400))
    final1.add(BatchNormalization())
    final1.add(Activation('relu'))
    final1.add(Dropout(0.3))
    
    final = Sequential()
    final.add(Merge([final1, pred_context_model], mode = 'concat', concat_axis = 1))
    
    final.add(Dense(number_of_roles))
    final.add(BatchNormalization())
    final.add(Activation('softmax'))
    
    final.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return final

In [160]:
def construct_plain_model(number_of_features, number_of_roles):
    print('Plain model.')
    
    plain_model = Sequential()
    plain_model.add(Dense(600, input_shape = (number_of_features,), activation = 'relu'))
    plain_model.add(Dropout(0.3))
    
    plain_model.add(Dense(400))
    plain_model.add(BatchNormalization())
    plain_model.add(Activation('relu'))
    plain_model.add(Dropout(0.3))
    
    plain_model.add(Dense(number_of_roles))
    plain_model.add(BatchNormalization())
    plain_model.add(Activation('softmax'))
    
    return plain_model

In [14]:
def construct_plain_model_sparse(categ_size, emb_size, number_of_roles):    
    input_plain = Input(shape=(categ_size,), name = 'input_categorical')
    input_pred_embed = Input(shape=(emb_size,), name = 'pred_embed')
    input_arg_embed = Input(shape=(emb_size,), name = 'arg_embed')
    
    plain = Dense(400)(input_plain)
    plain = BatchNormalization()(plain)
    plain = Activation('relu')(plain)
    
    def embed_submodel(inpt):
        embed = Dense(100)(inpt)
        embed = BatchNormalization()(embed)
        embed = Activation('relu')(embed)
        return embed
    
    embed_pred = embed_submodel(input_pred_embed)
    embed_arg = embed_submodel(input_arg_embed)
    
    final = Concatenate(axis = 1)([embed_pred, embed_arg, plain])
    final = Dropout(0.3)(final)
    final = Dense(400)(final)
    final = BatchNormalization()(final)
    final = Activation('relu')(final)
    final = Dropout(0.3)(final)
    final = Dense(number_of_roles)(final)
    final = BatchNormalization()(final)
    final = Activation('softmax')(final)
    
    model = Model([input_arg_embed, input_pred_embed, input_plain], final)
    return model

# Experiments

In [222]:
from tensorflow.python.keras.utils import Sequence as KerasSequence


class GeneratorSimple:
    def __init__(self, indexes, data, batch_size, labels, one_hot_encoders = None):
        self._data = vectorization.select_vectors({label : data[label] for label in labels}, indexes)
        self._data_size = self._data[next(iter(self._data.keys()))].shape[0]
        self._batch_size = batch_size
        self._labels = labels
        self._encs = one_hot_encoders
        
    def __len__(self):
        return math.ceil(self._data_size / self._batch_size)
    
    def _get_chunk(self, label, idx):
        return self._data[label][idx * self._batch_size : (idx + 1) * self._batch_size]
    
    def __getitem__(self, idx):
        return [self._encs[k].transform(self._get_chunk(k, idx).reshape(-1, 1)) 
                if self._encs is not None and k in self._encs 
                else self._get_chunk(k, idx) 
                for k in self._labels]
    

class GeneratorUniversal(KerasSequence):
    def __init__(self, gen_X, gen_y):
        self._gen_X = gen_X
        self._gen_y = gen_y
        
    def __len__(self):
        return len(self._gen_X)
    
    def __getitem__(self, idx):
        return (self._gen_X[idx], self._gen_y[idx])
    

class GeneratorZeros:
    def __init__(self, data_size, shape, batch_size):
        self._data_size = data_size
        self._shape = shape
        self._batch_size = batch_size
        
    def __len__(self):
        return  math.ceil(self._data_size / self._batch_size)
    
    def __getitem__(self, idx):
        return numpy.zeros((min(self._batch_size, self._data_size - idx * self._batch_size),) + self._shape)
    

class GeneratorConcat:
    def __init__(self, generators):
        self._generators = generators
        
    def __len__(self):
        return len(self._generators[0])
    
    def __getitem__(self, idx):
        result = []
        for gen in self._generators:
            result += gen[idx]
        
        return result
    
    
class GeneratorConcatSingle:
    def __init__(self, generator):
        self._generator = generator
        
    def __len__(self):
        return len(self._generator)
        
    def __getitem__(self, idx):
        lst = self._generator[idx]
        return [np.concatenate(lst, axis=1)]
    
    def shape(self):
        return self.__getitem__(0)[0].shape[1]
    
    
class GeneratorLambdaDecorator:
    def __init__(self, generator, lmbd):
        self._generator = generator
        self._lmbd = lmbd
    
    def __len__(self):
        return len(self._generator)
    
    def __getitem__(self, idx):
        return self._lmbd(self._generator[idx])
    
    

extract_one_hots = lambda dataset: {k : dataset.get_onehot(k) 
                                    for k in dataset._data.keys() 
                                    if dataset.get_onehot(k)}


def construct_model(ctor, lr=0.1, batch_size=300):
    model = ctor()
    model.compile(optimizer=Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())
    return model


def make_ctor(model_type):
    def mk_model():
        return construct_model(lambda: model_type)
    
    return mk_model

## Simple model

In [230]:
simple_features_generator_ctor = (lambda indexes, batch_size: 
                                  GeneratorUniversal(GeneratorConcatSingle(GeneratorSimple(indexes, 
                                                                            data, 
                                                                            batch_size, 
                                                                            list(data._data.keys()),
                                                                            extract_one_hots(data))), 
                                                      GeneratorSimple(indexes, 
                                                                      y, 
                                                                      batch_size, 
                                                                      ['role'],
                                                                      extract_one_hots(y))
                                                    )
                                 )


simple_features_generator = simple_features_generator_ctor(indexes, data.size)
number_of_features = simple_features_generator._gen_X.shape()
number_of_roles = len(y._data['role']['encoder'])
print('Number of features:', number_of_features)
print('Number of roles:', number_of_roles)

# We load everything in memory since dataset is rather small.
batch = simple_features_generator[0]
X_vec = batch[0]
y_vec = batch[1] 

Number of features: 947
Number of roles: 44


In [None]:
%%time

model = construct_model(lambda : construct_plain_model(number_of_features, 
                                                       number_of_roles))

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')
model.fit(X_vec, y_vec, 
          epochs=15, batch_size=batch_size, validation_split = 0.1, 
          shuffle=True, callbacks = [early_stopping])

## Sparse model

In [193]:
embedding_features = ['arg_lemma', 'pred_lemma']
other_features = [k for k in data._data.keys() if k not in embedding_features]

complex_features_generator_ctor = (lambda indexes, batch_size: 
                                  GeneratorUniversal(
                                      GeneratorConcat(
                                          [
                                              GeneratorSimple(indexes, 
                                                           data, 
                                                           batch_size, 
                                                           embedding_features),
                                              GeneratorConcatSingle(GeneratorSimple(indexes, 
                                                                            data, 
                                                                            batch_size, 
                                                                            other_features,
                                                                            extract_one_hots(data)))
                                          ]
                                      ), 
                                      GeneratorSimple(indexes, 
                                                      y, 
                                                      batch_size, 
                                                      ['role'],
                                                      extract_one_hots(y))
                                  )
                                 )


complex_features_generator = complex_features_generator_ctor(indexes, data.size)
number_of_categorical_features = complex_features_generator._gen_X._generators[1].shape()
number_of_roles = len(y._data['role']['encoder'])
print('Number of categorical features:', number_of_categorical_features)
print('Number of roles:', number_of_roles)

# We load everything in memory since dataset is rather small.
batch = complex_features_generator[0]
X_vec = batch[0]
y_vec = batch[1] 

Number of categorical features: 347
Number of roles: 44


In [None]:
%%time

model = construct_model(lambda : construct_plain_model_sparse(number_of_categorical_features, 
                                                              300, 
                                                              number_of_roles))

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')
model.fit(X_vec, y_vec, 
          epochs=15, batch_size=batch_size, validation_split = 0.1, 
          shuffle=True, callbacks = [early_stopping])

# Evaluation

In [210]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score


def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, *args, **kwargs):
    model.fit(X_train, y_train, *args, **kwargs)
    
    keras_eval = model.evaluate(X_test, y_test)
    
    pred = model.predict(X_test).argmax(axis = 1)
    f1_micro = f1_score(pred, y_test.argmax(axis = 1), average = 'micro')
    f1_macro = f1_score(pred, y_test.argmax(axis = 1), average = 'macro')
    accur = accuracy_score(pred, y_test.argmax(axis = 1))
    
    return list(keras_eval) + [f1_micro, f1_macro, accur]
    

def custom_cross_val(cr_f, X, y, cv, *args, **kwargs):
    eval_res = list()
    for i, (train, test) in enumerate(cv.split(y)):
        model = cr_f()
        print("Running Fold", i+1, "/", cv.n_splits)
        eval1 = train_and_evaluate_model(model, 
                                         [X[j][train] for j in range(len(X))], y[train], 
                                         [X[j][test] for j in range(len(X))], y[test], 
                                         *args, **kwargs)
        
        print()
        print('Fold result: ', eval1)
        eval_res.append(eval1)
    
    return np.array(eval_res)


def describe_cv_result(cv_res):
    print(cv_res)
    mean_cv_res = cv_res.mean(axis = 0)
    std_cv_res = cv_res.std(axis = 0)
    print('Mean')
    print(pd.DataFrame([mean_cv_res], columns = ['loss', 'keras_accur', 'micro_f1', 'macro_f1', 'accur']))
    print('Std')
    print(pd.DataFrame([std_cv_res], columns = ['loss', 'keras_accur', 'micro_f1', 'macro_f1', 'accur']))
    #print 'Loss:', mean_cv_res[4], cv_res[:, 4].std()
    #print 'Accur:', cv_res[:, 5].mean(), cv_res[:, 5].std()
    
    
from sklearn.model_selection import KFold
cv = KFold(n_splits=5, random_state=42)

## Sparse model 

In [None]:
cv_res = custom_cross_val(make_ctor(construct_plain_model_sparse(number_of_categorical_features, 
                                                                 300, 
                                                                 number_of_roles)),
                          X_vec, y_vec[0], 
                          cv = cv, epochs=13, batch_size=64,
                          validation_split = 0., shuffle=True, verbose = 0)

describe_cv_result(cv_res)

## Simple model

In [None]:
cv_res = custom_cross_val(make_ctor(construct_plain_model(number_of_features, 
                                                          number_of_roles)),
                          X_vec, y_vec[0], 
                          cv = cv, epochs=13, batch_size=64,
                          validation_split = 0., shuffle=True, verbose = 0)

describe_cv_result(cv_res)