**Preprocessing models**:
- Spacy model: https://github.com/explosion/spacy-models/releases/tag/de_core_news_sm-2.3.0
- Word2Vec: Can be trained with the **Word2Vec_10kGNAD** notebook

In [2]:
import os
import datetime
import json
import itertools
from gensim.models import Word2Vec
import numpy as np
import spacy
from tensorflow.keras import Input
from tensorflow.keras import backend as K, initializers, regularizers, constraints
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Layer, Dropout, LSTM, Dense, InputLayer
from tensorflow.keras.losses import Loss
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity

print('Tensorflow Version: {}'.format(tf.__version__))

DATA_PATH = '../data/GermanFakeNC.json'
DATA_PATH_PROCESSED = '../data/GermanFakeNC_PROCESSED'
NUM_ARTICLES = 489
MODEL_NAME = "CLEF_2019_HANSEN"
MODEL_PATH_BASE = '../models/' + MODEL_NAME + '_BASE'
MODEL_PATH_RANKING = '../models/' + MODEL_NAME + '_RANKING'
MODEL_PATH_W2V = '../models/w2v.model'
MODEL_PATH_SPACY = '../models/de_core_news_sm-2.3.0'
SEED = 12345
NUM_SAMPLING_CANDIDATES = 5
LSTM_HIDDEN_UNITS = 100
EPOCHS = 10
CROSS_VALIDATION_K_FOLDS = 19
DATASET_SIZE = 14765
DATASET_TRAIN_SPLIT = 0.8
DATASET_DEV_SPLIT = 0.8
BATCH_SIZE = 120
DROPOUT = 0.3

# Load preprocessing models
w2v_model = Word2Vec.load(MODEL_PATH_W2V)
spacy_model = spacy.load("de_core_news_sm")

# Load the TensorBoard notebook extension
%load_ext tensorboard

Tensorflow Version: 2.4.1
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Data preprocessing

In [33]:
def read_data(path):
    with open(path) as json_file:
        return json.load(json_file)

def count_matches(false_statement, sentence):
    count = 0
    sent_copy = sentence[:]
    for w in false_statement:
        if w in sent_copy:
            count += 1
            sent_copy.remove(w)
    return count

def process_text(sentences, article_id,  max_sent_len):
    processed = []
    for s in sentences:
        # ignore sentences of length 1
        if len(s) <= 1:
            continue
        # ignore sentences consisting exclusively of punctuation
        if not any([not t.is_punct for t in s]):
            continue
        # ignore sentences not containing any letter
        if not any([any([c.isalpha() for c in t.text]) for t in s]):
            continue
        if len(s) > max_sent_len:
            max_sent_len = len(s)
        processed.append({
            'article_id': article_id,
            'org': s.text,
            'lbl': 0.0,
            'tokenized': [t.text for t in s],
            'tokenized_lower': [t.text.lower() for t in s]
        })
    return processed, max_sent_len

data = []
max_sent_len = 0
for article_id, article in enumerate(read_data(DATA_PATH)):
    title = spacy_model(article['Title']).sents
    teaser = spacy_model(article['Teaser']).sents
    text = spacy_model(article['Text']).sents
    
    p_title, max_sent_len = process_text(title, article_id, max_sent_len)
    p_teaser, max_sent_len = process_text(teaser, article_id, max_sent_len)
    p_text, max_sent_len = process_text(text, article_id, max_sent_len)
       
    article_data = p_title + p_teaser + p_text

    # Label sentences
    false_statements = [article['False_Statement_1'], article['False_Statement_2'], article['False_Statement_3']]     
    for fs in false_statements:
        if fs != '':
            fs_tokens = [t.text.lower() for t in spacy_model(fs)]
            matches = [count_matches(fs_tokens, t) for t in [d['tokenized_lower'] for d in article_data]]
            m = max(matches)
            max_indexes = [i for i, j in enumerate(matches) if j == m]
            
            # +++++++ DEBUG CODE - START +++++++++ #
            #if article_id == 400:
            #    print("\n\nFalse Statement: {} \n\n".format(fs))
            #    for mi in max_indexes:
            #        print(article_data[mi]['org'])
            # +++++++ DEBUG CODE - END   +++++++++ #
                
            for mi in max_indexes:
                article_data[mi]['lbl'] = 1.0
            
    data = data + article_data

### Labeling tests
#### Options to match fake statements to sentences
* Test if sentence is in fake statement: matched 53.7% of false statements 
* Seperate into word tokens and test if some percetage of words is in a false statement
* Label sentence with most matching words as false statement

In [None]:
tf_stats = 0
for a in read_data(DATA_PATH):
    for number in ['1','2','3']:
        if a['False_Statement_' + number] != '':
            tf_stats += 1
            
cf_stats = len(list(filter(lambda d: not d['lbl'], data))) 
print("Number of all sentences {}".format(len(data)))
print("True number of false statements {}".format(tf_stats))
print("Classified number of false statements {} ({:.1f}%)".format(cf_stats, (cf_stats * 100) / tf_stats))

### Dependency Parsing

In [22]:
def to_deps(doc, max_sent_len):
    oh_vectors = []
    for token in doc:
        vec = np.zeros(max_sent_len)
        vec[token.head.i] = 1
        oh_vectors.append(vec)
        
    # padding with 0 vectors to max sentence length
    while len(oh_vectors) < max_sent_len:
        oh_vectors.append(np.zeros(max_sent_len))
    return oh_vectors


In [35]:
for d in data:
    doc = spacy_model(d['org'])
    d['processed'] = to_deps(doc, max_sent_len)

### Word Embedding

In [23]:
def embed(sentence, max_sent_len):
    vectorized_sentence = []
    vector_dim = w2v_model.wv.vector_size
    for word in sentence:
        if word in w2v_model.wv:
            vectorized_sentence.append(w2v_model.wv[word])
        else:
            vectorized_sentence.append(np.zeros(vector_dim))
            
    # padding with 0 vectors to max sentence length
    while len(vectorized_sentence) < max_sent_len:
        vectorized_sentence.append(np.zeros(vector_dim))
        
    return vectorized_sentence


In [37]:
for d in data:
    embedded_words = embed(d['tokenized_lower'], max_sent_len)
    d['processed'] = np.concatenate((embedded_words, d['processed']), axis=1)

### Seperating data

In [38]:
# data is seperated by article because of MAP evaluation later
num_train_articles = int(DATASET_TRAIN_SPLIT * NUM_ARTICLES)
train_data = list(filter(lambda d: d['article_id'] <= num_train_articles, data))
test_data = list(filter(lambda d: d['article_id'] > num_train_articles, data))

### Contrastive Sampling

In [42]:
def compute_sentence_embeddings(data):
    word_vector_dim = w2v_model.wv.vector_size
    for d in data:
        word_embeddings = [w[:word_vector_dim] for w in d['processed']]
        yield np.mean(word_embeddings, axis=0)
        
def retrieve_topk_ixs(entry_index, data, k, sims):
    topk_stack = [(0,0)]
    
    for i, sim in enumerate(sims):
        is_greater = any([sim > tk_sim for (index, tk_sim) in topk_stack])
        negative_label = data[entry_index]['lbl'] != data[i]['lbl']
        not_own_sim = entry_index != i
        
        if is_greater and negative_label and not_own_sim: 
            if len(topk_stack) >= k:
                topk_stack.pop()

            topk_stack.append((i, sim))    
            topk_stack.sort(reverse=True)
    return [index for (index, sim) in topk_stack]

# only use train data
# no negative sampling for test data neccesary
sentence_embeddings = list(compute_sentence_embeddings(train_data))

similarities = cosine_similarity(sentence_embeddings, sentence_embeddings)

k = NUM_SAMPLING_CANDIDATES
processed_topk_candidates = []
for i, row_sims in enumerate(similarities):
    top_k_ixs = retrieve_topk_ixs(i, data, k, row_sims)
    
    top_k_processed = []    
    for top_k_ix in top_k_ixs:
        top_k_processed.append(train_data[top_k_ix]['processed']) 
    processed_topk_candidates.append(top_k_processed)
    

def assign_candidate(d, ptc):
    d_copy = dict(d)
    d_copy['cs'] = ptc
    return d_copy
    
train_data = [[assign_candidate(d, ptc) for ptc in ptcs] for d, ptcs in zip(train_data, processed_topk_candidates)]

flatten = lambda lst: [j for sub in lst for j in sub]
train_data = flatten(train_data)

### Serialization

In [43]:
def chunks(lst, n):
    # yield successive n-sized chunks from lst.
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
def serialize_wsampling(sdata, chunk_size, file_suffix):
    aid_chunks = chunks([d['article_id'] for d in sdata], chunk_size)
    X_chunks = chunks([d['processed'] for d in sdata], chunk_size)
    y_chunks = chunks([d['lbl'] for d in sdata], chunk_size)
    cs_chunks = chunks([d['cs'] for d in sdata], chunk_size)

    zipped_chunks = zip(aid_chunks, X_chunks, y_chunks, cs_chunks)
    for (i, (aid_chunk, X_chunk, y_chunk, cs_chunk)) in enumerate(zipped_chunks):
        writer = tf.io.TFRecordWriter(DATA_PATH_PROCESSED + '_{}_{}'.format(file_suffix, i) + '.tfrecords')
        for (aidc, xc, yc, csc) in zip(aid_chunk, X_chunk, y_chunk, cs_chunk):
            # Convert to TFRecords and save to file
            feature = {
                'article_id': tf.train.Feature(int64_list=tf.train.Int64List(value=[aidc])),
                'x': tf.train.Feature(float_list=tf.train.FloatList(value=np.stack(xc).flatten())),
                'y': tf.train.Feature(float_list=tf.train.FloatList(value=[yc])),
                'cs': tf.train.Feature(float_list=tf.train.FloatList(value=np.stack(csc).flatten()))
            }
            
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            serialized = example.SerializeToString()
            writer.write(serialized)
        writer.close()
        
def serialize(sdata, chunk_size, file_suffix):
    aid_chunks = chunks([d['article_id'] for d in sdata], chunk_size)
    X_chunks = chunks([d['processed'] for d in sdata], chunk_size)
    y_chunks = chunks([d['lbl'] for d in sdata], chunk_size)
    
    zipped_chunks = zip(aid_chunks, X_chunks, y_chunks)
    for (i, (aid_chunk, X_chunk, y_chunk)) in enumerate(zipped_chunks):
        writer = tf.io.TFRecordWriter(DATA_PATH_PROCESSED + '_{}_{}'.format(file_suffix, i) + '.tfrecords')
        for (aidc, xc, yc) in zip(aid_chunk, X_chunk, y_chunk):
            # Convert to TFRecords and save to file
            feature = {
                'article_id': tf.train.Feature(int64_list=tf.train.Int64List(value=[aidc])),
                'x': tf.train.Feature(float_list=tf.train.FloatList(value=np.stack(xc).flatten())),
                'y': tf.train.Feature(float_list=tf.train.FloatList(value=[yc]))
            }
            
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            serialized = example.SerializeToString()
            writer.write(serialized)
        writer.close()
        
chunk_size = 2000
serialize(train_data, chunk_size, 'TRAIN_SAMPLING')
serialize(test_data, chunk_size, 'TEST')

#### Serialize Base Model Data

In [44]:
serialize_wsampling(train_data, chunk_size, 'TRAIN_SAMPLING')
serialize(test_data, chunk_size, 'TEST')

#### Serialize Ranking Model Data

In [41]:
serialize(train_data, chunk_size, 'TRAIN')
serialize(test_data, chunk_size, 'TEST')

# Model Definition and Training

In [16]:
def input_parser(example):
    feature_description = {'article_id': tf.io.FixedLenFeature([1], dtype=tf.int64), 
                           'x': tf.io.FixedLenFeature([135, 285], dtype=tf.float32),
                           'y': tf.io.FixedLenFeature([1], dtype=tf.float32)}

    parsed = tf.io.parse_single_example(example, feature_description)
    return (parsed['article_id'],parsed['x'],parsed['y'])

def input_parser_cs(example):
    feature_description = {'article_id': tf.io.FixedLenFeature([1], dtype=tf.int64), 
                           'x': tf.io.FixedLenFeature([135, 285], dtype=tf.float32),
                           'y': tf.io.FixedLenFeature([1], dtype=tf.float32),
                           'cs': tf.io.FixedLenFeature([135, 285], dtype=tf.float32)}

    parsed = tf.io.parse_single_example(example, feature_description)
    return (parsed['article_id'],parsed['x'],parsed['y'],parsed['cs'])

train_data_files = tf.data.Dataset.list_files(DATA_PATH_PROCESSED + '_TRAIN_*.tfrecords')
train_data_raw = tf.data.TFRecordDataset(train_data_files)
train_dataset = train_data_raw.map(input_parser)

train_sampling_data_files = tf.data.Dataset.list_files(DATA_PATH_PROCESSED + '_TRAIN_SAMPLING_*.tfrecords')
train_sampling_data_raw = tf.data.TFRecordDataset(train_sampling_data_files)
train_sampling_dataset = train_sampling_data_raw.map(input_parser_cs)

test_data_files = tf.data.Dataset.list_files(DATA_PATH_PROCESSED + '_TEST_*.tfrecords')
test_data_raw = tf.data.TFRecordDataset(train_data_files)
test_dataset = test_data_raw.map(input_parser)

# shuffling seems to produce an error, maybe include later again
#train_dataset = train_dataset.map(lambda ida, x, y, topk: (x, y, topk)).shuffle(1000).batch(BATCH_SIZE)

# there has already been a train/test data split in preprocessing
train_dataset_size = int(DATASET_SIZE * DATASET_TRAIN_SPLIT)

train_sampling_dataset_size = int(train_dataset_size * NUM_SAMPLING_CANDIDATES * DATASET_DEV_SPLIT)
train_sampling_dataset = train_sampling_dataset.map(lambda ida, x, y, cs: ({'in_s1': x, 'in_s2': cs}, {'out_s1': y,'out_diff': y}))
train_sampling_dataset_split = train_sampling_dataset.take(train_sampling_dataset_size).batch(BATCH_SIZE)
dev_sampling_dataset = train_sampling_dataset.skip(train_sampling_dataset_size).batch(BATCH_SIZE)

train_dataset_size = int(DATASET_SIZE * DATASET_DEV_SPLIT)
train_dataset = train_dataset.map(lambda ida, x, y: (x, y))
train_dataset_split = train_dataset.take(train_dataset_size).batch(BATCH_SIZE)
dev_dataset = train_dataset.skip(train_dataset_size).batch(BATCH_SIZE)

# the eval examples do contain article_id to determine MAP
test_dataset_eval = test_dataset
# test examples for model don't contain article id
test_dataset = test_dataset.map(lambda ida, x, y: (x, y)).batch(BATCH_SIZE)

In [18]:
for x in train_dataset.take(1):
    print(x)

(<tf.Tensor: shape=(135, 285), dtype=float32, numpy=
array([[ 2.0907426 ,  0.22835703,  1.4968257 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.02685186,  0.40970182, -0.16604422, ...,  0.        ,
         0.        ,  0.        ],
       [-2.2532084 ,  2.248028  ,  3.177055  , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>)


### Model definitions

In [4]:
# SOURCE: https://gist.github.com/cbaziotis/6428df359af27d58078ca5ed9792bd6d

def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        # todo: check that this is correct
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)


class Attention(Layer):
    def __init__(self,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True,
                 return_attention=False,
                 **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Note: The layer has been tested with Keras 1.x
        Example:
        
            # 1
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
            # next add a Dense layer (for classification/regression) or whatever...
            # 2 - Get the attention scores
            hidden = LSTM(64, return_sequences=True)(words)
            sentence, word_scores = Attention(return_attention=True)(hidden)
        """
        self.supports_masking = True
        self.return_attention = return_attention
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        super(Attention, self).__init__(**kwargs)
        
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'supports_masking': self.supports_masking,
            'return_attention': self.return_attention,
            'init': self.init,
            'W_regularizer': self.W_regularizer,
            'b_regularizer': self.b_regularizer,
            'W_constraint': self.W_constraint,
            'b_constraint': self.b_constraint,
            'bias': self.bias,
        })
        return config

    def build(self, input_shape):
        assert len(input_shape) == 3

        
        self.W = self.add_weight(shape=(input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        if self.bias:
            self.b = self.add_weight(shape=(input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result

    def compute_output_shape(self, input_shape):
        if self.return_attention:
            return [(input_shape[0], input_shape[-1]),
                    (input_shape[0], input_shape[1])]
        else:
            return input_shape[0], input_shape[-1]

In [5]:
def build_base_model(model_name='base'):
    inp_shape = (135, 285)
    model = Sequential(name=model_name)
    model.add(LSTM(LSTM_HIDDEN_UNITS, input_shape=inp_shape, return_sequences = True, name='lstm'))
    model.add(Attention(name='attention'))
    model.add(Dropout(DROPOUT))
    model.add(Dense(1, activation='sigmoid', name='dense'))
    return model

In [9]:
def build_ranking_model():
    inp_shape = (135, 285)
    
    in_s1 = Input(inp_shape, name='in_s1')
    in_s2 = Input(inp_shape, name='in_s2')
    
    base_model = build_base_model()
    
    out_s1 = base_model(in_s1)
    out_s1 = Layer(name='out_s1')(tf.identity(out_s1))
    out_s2 = base_model(in_s2)
    out_diff = Layer(name='out_diff')(tf.math.subtract(out_s1, out_s2, name='out_diff'))
    
    model = tf.keras.Model(inputs=[in_s1, in_s2], outputs=[out_s1, out_diff], name='ranking')
    
    return model

### Model training

In [6]:
def get_checkpoint_callback(model_path, monitor_value):
    return tf.keras.callbacks.ModelCheckpoint(model_path, 
                                              monitor=monitor_value, verbose=1, 
                                              save_best_only=True, mode='max')

log_dir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir, histogram_freq=1)

In [19]:
model = build_base_model()
model.summary()
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

checkpoint_callback = get_checkpoint_callback(MODEL_PATH_BASE, 'val_binary_accuracy')

history = model.fit(train_dataset_split,
            epochs=EPOCHS,
            callbacks=[checkpoint_callback, tensorboard_callback],
            validation_data=dev_dataset)

Model: "base"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 135, 100)          154400    
_________________________________________________________________
attention (Attention)        (None, 100)               235       
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 154,736
Trainable params: 154,736
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10

Epoch 00001: val_binary_accuracy improved from -inf to 0.92561, saving model to ../models/CLEF_2019_HANSEN_BASE




INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


Epoch 2/10

Epoch 00002: val_binary_accuracy did not improve from 0.92561
Epoch 3/10

Epoch 00003: val_binary_accuracy improved from 0.92561 to 0.92707, saving model to ../models/CLEF_2019_HANSEN_BASE




INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


Epoch 4/10

Epoch 00004: val_binary_accuracy improved from 0.92707 to 0.93209, saving model to ../models/CLEF_2019_HANSEN_BASE




INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_BASE/assets


Epoch 5/10

Epoch 00005: val_binary_accuracy did not improve from 0.93209
Epoch 6/10

Epoch 00006: val_binary_accuracy did not improve from 0.93209
Epoch 7/10

Epoch 00007: val_binary_accuracy did not improve from 0.93209
Epoch 8/10

Epoch 00008: val_binary_accuracy did not improve from 0.93209
Epoch 9/10

Epoch 00009: val_binary_accuracy did not improve from 0.93209
Epoch 10/10

Epoch 00010: val_binary_accuracy did not improve from 0.93209


In [10]:
class RankingError(Loss):    
    def call(self, y_true, y_diff):
        pos = tf.constant([1.0 for i in range(BATCH_SIZE)])
        neg = tf.constant([-1.0 for i in range(BATCH_SIZE)])
        sign = tf.where(tf.equal(y_true,1.0), pos, neg)

        return tf.math.maximum(0.0, 1.0 - sign * y_diff)
    
    
model = build_ranking_model()
tf.keras.utils.plot_model(model, show_shapes=True)

model.compile(
    optimizer='adam',
    loss=[
        tf.keras.losses.BinaryCrossentropy(),
        RankingError(),
    ],
    loss_weights=[0.5, 0.5],
    metrics=[tf.keras.metrics.BinaryAccuracy()]
)

checkpoint_callback = get_checkpoint_callback(MODEL_PATH_RANKING, 'val_out_s1_binary_accuracy')

history = model.fit(train_sampling_dataset_split,
            epochs=2,
            callbacks=[checkpoint_callback, tensorboard_callback],
            validation_data=dev_sampling_dataset)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
Epoch 1/2

Epoch 00001: val_out_s1_binary_accuracy improved from -inf to 0.91136, saving model to ../models/CLEF_2019_HANSEN_RANKING




INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_RANKING/assets


INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_RANKING/assets


Epoch 2/2

Epoch 00002: val_out_s1_binary_accuracy improved from 0.91136 to 0.93352, saving model to ../models/CLEF_2019_HANSEN_RANKING




INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_RANKING/assets


INFO:tensorflow:Assets written to: ../models/CLEF_2019_HANSEN_RANKING/assets


### Evaluation

In [36]:
# load base model
test_model_base = tf.keras.models.load_model(MODEL_PATH_BASE)

#### Sample prediction for base model

In [37]:
# preprocess data
false_statement = "Um die Ermordung unschuldiger Zivilisten in Russland zu üben, sucht die NATO für ihre Manöver russischsprachige Menschen."
tokens = spacy_model(false_statement)
deps = to_deps(tokens, 135)
word_vecs = embed([t.text.lower() for t in tokens], 135)
inp = np.concatenate((word_vecs, deps), axis=1)
print(len(inp[0]))

285


In [38]:
prediction = test_model_base.predict(np.array( [inp,] ))
print(prediction)

[[0.3697008]]


In [44]:
# load ranking model
test_model_ranking = tf.keras.models.load_model(MODEL_PATH_RANKING, compile=False)
test_model_ranking = test_model_ranking.get_layer(name='base')

In [45]:
prediction = test_model_ranking.predict(np.array( [inp,] ))
print(prediction)

[[0.08147946]]


#### MAP

In [39]:
eval_data = list(test_dataset_eval.as_numpy_iterator())
eval_data = [(ida[0], x, y[0]) for ida, x, y in eval_data]

In [40]:
# MAP metric is based on the official CLEF2019 implementation: 
# https://github.com/apepa/clef2019-factchecking-task1/blob/7d463336897ad1f870cb6a481953b94550c788a7/scorer/main.py#L52

def mean_average_precision(data, model):
    avg_precisions = []
    article_ids = set([ida for ida, _, _ in data])
    num_articles = len(article_ids)
    
    for id_article in article_ids:
        article_examples = [(x,y) for ida, x, y in data if ida == id_article]
        xs = [x for x,y in article_examples]
        ys = [y for x,y in article_examples]
        
        num_positive = sum(ys)

        predictions = [p[0] for p in model.predict(np.array(xs))]
        ranked_indices = [i for i, v in sorted(enumerate(predictions), key=lambda tup: tup[1], reverse=True)]
        
        # ++++ DEBUG CODE - START +++ #
        #hits = []
        #for i in range(len(ranked_indices)):
        #   if ys[ranked_indices[i]] == 1:
        #        hits.append(1)
        #    else:
        #        hits.append(0)
        #print(hits)
        # ++++ DEBUG CODE - END   +++ #
        
        precisions = []
        num_correct = 0
        for i in range(len(ranked_indices)):
            if ys[ranked_indices[i]] == 1:
                num_correct += 1
                precisions.append(num_correct / (i + 1))
            
        if precisions:
            avg_precisions.append(sum(precisions) / num_positive)
        else:
            avg_precisions.append(0)
        
    return sum(avg_precisions) / num_articles

MAP: 0.35671007297978213


In [46]:
print('Base/MAP: {}'.format(mean_average_precision(eval_data, test_model_base)))

Base/MAP: 0.35671007297978213


In [47]:
print('Ranking/MAP: {}'.format(mean_average_precision(eval_data, test_model_ranking)))

Ranking/MAP: 0.4634679725612787


### Results
|     | Base | Ranking |
|-----|------|---------|
| MAP |   0.35671007297978213   |  0.4634679725612787       |
| P@1 |      |         |