In [None]:
%tensorflow_version 1.x

In [None]:
import tensorflow as tf
import json
import h5py
import pickle
import json
import numpy as np
import math
import sys
import os
from scipy import stats
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
print(tf.__version__)

In [None]:
# ablation_feature = 'history or max_feature_length or no_exo'
# ablation_value = # 10,20,30,50,100 for history or 300,600,1000 for max_feature_length
data_path = os.path.join("..", "data")
name_ext = input("Enter the file-name identifier extension")
#eg 30h for use_history size=30.
if len(name_ext) == 0:
    name_ext = "30h"

## RETINA MODELS

In [None]:
class Attention(tf.keras.layers.Layer):
    def __init__(self, hidden_dim=64, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.hidden_dim = hidden_dim

    def build(self, input_shape):
        super(Attention, self).build(input_shape)

    def call(self, q, k, v):
        q = tf.expand_dims(q, axis=1)
        print(q)
        att_weights = tf.squeeze(tf.matmul(k, q, transpose_b=True),
                                 axis=-1) / tf.sqrt(
                                     tf.cast(self.hidden_dim, tf.float32))
        att_weights = tf.expand_dims(tf.nn.softmax(att_weights, axis=-1),
                                     axis=-1)
        v = tf.expand_dims(v, axis=1)
        return tf.reduce_sum(v * att_weights, axis=1)

In [None]:
class Attention_new(tf.keras.layers.Layer):
    def __init__(self, hidden_dim=64, **kwargs):
        super(Attention_new, self).__init__(**kwargs)
        self.hidden_dim = hidden_dim

    def build(self, input_shape):
        super(Attention_new, self).build(input_shape)

    def call(self, q, k, v):
        q = tf.expand_dims(q, axis=1)
        att_weights = tf.squeeze(tf.matmul(k, q, transpose_b=True),
                                 axis=-1) / tf.sqrt(
                                     tf.cast(self.hidden_dim, tf.float32))
        att_weights = tf.expand_dims(tf.nn.softmax(att_weights, axis=-1),
                                     axis=-1)
        v = v * att_weights
        return tf.reduce_sum(v, axis=1)

In [None]:
def getAttention(hidden_dim=64, bert_dim=500, tweet_dim=50, **kwargs):
    '''tf.keras model implementing attention.
    Variable 'mask' denotes positions of the news sequence to be
    mased (due to zero-padding, etc.). Example of mask: [0, 0, 0, 1] 
    which will mask out the last sequence element.'''

    news_inp = tf.keras.layers.Input(shape=(None, bert_dim))
    tweet_inp = tf.keras.layers.Input(shape=(tweet_dim, ))
    q_tweet = tf.keras.layers.Dense(hidden_dim, use_bias=False)(tweet_inp)
    k_news = tf.keras.layers.Dense(hidden_dim, use_bias=False)(news_inp)
    output = Attention()(q_tweet, k_news, tweet_inp)
    return tf.keras.models.Model([tweet_inp, news_inp], output)

In [None]:
def getAttention_new(hidden_dim=64, bert_dim=500, tweet_dim=50, **kwargs):
    '''tf.keras model implementing attention.
    Variable 'mask' denotes positions of the news sequence to be
    mased (due to zero-padding, etc.). Example of mask: [0, 0, 0, 1] 
    which will mask out the last sequence element.'''

    news_inp = tf.keras.layers.Input(shape=(None, bert_dim))
    tweet_inp = tf.keras.layers.Input(shape=(tweet_dim, ))
    q_tweet = tf.keras.layers.Dense(hidden_dim, use_bias=False)(tweet_inp)
    k_news = tf.keras.layers.Dense(hidden_dim, use_bias=False)(news_inp)
    v_news = tf.keras.layers.Dense(hidden_dim, use_bias=False)(news_inp)
    output = Attention_new()(q_tweet, k_news, v_news)
    return tf.keras.models.Model([tweet_inp, news_inp], output)

### RETINA Static Mode

In [None]:
def StaticModel(hidden_dim=64,
                bert_dim=500,
                tweet_dim=50,
                feature_dim=616,
                num_users=500,
                dropout=0.3,
                att_mode='tweet',
                **kwargs):
    '''Creates static prediction model. 
    'num_users' is the largest number of users can be taken (retweeters+followers). 
    Prediction is done every time step independently. 
    'feature_dim' is the unnormalized features (used previously for logreg).'''

    features = tf.keras.layers.Input(shape=(num_users, feature_dim))
    exo_signal = tf.keras.layers.Input(shape=(None, bert_dim))
    root_tweet = tf.keras.layers.Input(shape=(tweet_dim, ))
    norm_feature = tf.keras.layers.LayerNormalization()(features)
    int_feature = tf.keras.layers.Dense(hidden_dim,
                                        activation='tanh')(norm_feature)
    int_feature = tf.keras.layers.Dropout(dropout)(int_feature)

    if att_mode == 'tweet':
        att_root = getAttention(hidden_dim=hidden_dim,
                                bert_dim=bert_dim,
                                tweet_dim=tweet_dim)([root_tweet, exo_signal])
    elif att_mode == 'news':
        att_root = getAttention_new(hidden_dim=hidden_dim,
                                    bert_dim=bert_dim,
                                    tweet_dim=tweet_dim)(
                                        [root_tweet, exo_signal])
    else:
        raise (AttributeError,
               "Unrecognized attention mode, use 'tweet' or 'news'")

    att_root = tf.keras.layers.RepeatVector(num_users)(att_root)
    full_feature = tf.keras.layers.Concatenate(axis=-1)(
        [att_root, int_feature])
    norm_full_feature = tf.keras.layers.LayerNormalization()(full_feature)
    tf.print("normal", norm_full_feature[0][0], output_stream=sys.stdout)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(norm_full_feature)
    return tf.keras.models.Model([features, exo_signal, root_tweet], out)

### RETINA Static Mode, no exogenous influence

In [None]:
def StaticModel_noexo(hidden_dim=64,
                      tweet_dim=50,
                      feature_dim=616,
                      num_users=500,
                      dropout=0.3,
                      **kwargs):
    '''Creates static prediction model. 
    'num_users' is the largest number of users can be taken (retweeters+followers). 
    Prediction is done every time step independently. 
    'feature_dim' is the unnormalized features (used previously for logreg).'''
    features = tf.keras.layers.Input(shape=(num_users, feature_dim))
    root_tweet1 = tf.keras.layers.Input(shape=(tweet_dim, ))
    norm_feature = tf.keras.layers.LayerNormalization()(features)
    int_feature = tf.keras.layers.Dense(hidden_dim,
                                        activation='relu')(norm_feature)
    int_feature = tf.keras.layers.Dropout(dropout)(int_feature)

    root_tweet = tf.keras.layers.RepeatVector(num_users)(root_tweet1)
    full_feature = tf.keras.layers.Concatenate(axis=-1)(
        [root_tweet, int_feature])
    norm_full_feature = tf.keras.layers.LayerNormalization()(full_feature)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(norm_full_feature)
    return tf.keras.models.Model([features, root_tweet1], out)

### RETINA Dynamic Mode

In [None]:
def TemporalModel1(hidden_dim=64,
                   bert_dim=500,
                   tweet_dim=50,
                   feature_dim=617,
                   num_users=70,
                   time_steps=7,
                   dropout=0.2,
                   att_mode='tweet',
                   **kwargs):
    '''Creates temporal prediction model. 'num_users' is the largest number of
    users can be taken (retweeters+followers). Prediction is done for 'time_steps'
    times in forward time. 'feature_dim' is the unnormalized features (used previously for
    logreg).'''
    features = tf.keras.layers.Input(shape=(num_users, time_steps,
                                            feature_dim))
    exo_signal = tf.keras.layers.Input(shape=(None, bert_dim))
    root_tweet = tf.keras.layers.Input(shape=(tweet_dim, ))
    norm_feature = tf.keras.layers.LayerNormalization()(features)
    int_feature = tf.keras.layers.Dense(hidden_dim,
                                        activation='relu')(norm_feature)
    int_feature = tf.keras.layers.Dropout(dropout)(int_feature)

    if att_mode == 'tweet':
        att_root = getAttention(hidden_dim=hidden_dim,
                                bert_dim=bert_dim,
                                tweet_dim=tweet_dim)([root_tweet, exo_signal])
    elif att_mode == 'news':
        att_root = getAttention_new(hidden_dim=hidden_dim,
                                    bert_dim=bert_dim,
                                    tweet_dim=tweet_dim)(
                                        [root_tweet, exo_signal])
    else:
        raise (AttributeError,
               "Unrecognized attention mode, use 'tweet' or 'news'")

    att_root = tf.keras.layers.RepeatVector(num_users * time_steps)(att_root)
    att_root = tf.keras.layers.Reshape(
        (num_users, time_steps, tweet_dim))(att_root)
    full_feature = tf.keras.layers.Concatenate(axis=-1)(
        [att_root, int_feature])
    norm_full_feature = tf.keras.layers.LayerNormalization()(full_feature)
    norm_full_feature = tf.keras.layers.TimeDistributed(
        tf.keras.layers.GRU(hidden_dim,
                            return_sequences=True))(norm_full_feature)
    norm_full_feature = tf.keras.layers.Dropout(dropout)(norm_full_feature)
    norm_full_feature = tf.keras.layers.LayerNormalization()(norm_full_feature)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(norm_full_feature)

    return tf.keras.models.Model([features, exo_signal, root_tweet], out)

### RETINA Dynamic Mode, no exogenous influence

In [None]:
def TemporalModel1_noexo(hidden_dim=64,
                         bert_dim=500,
                         tweet_dim=50,
                         feature_dim=617,
                         num_users=250,
                         time_steps=7,
                         dropout=0.2,
                         att_mode='tweet',
                         **kwargs):
    '''Creates temporal prediction model. 'num_users' is the largest number of
    users can be taken (retweeters+followers). Prediction is done for 'time_steps'
    times in forward time. 'feature_dim' is the unnormalized features (used previously for
    logreg).'''
    features = tf.keras.layers.Input(shape=(num_users, time_steps,
                                            feature_dim))
    root_tweet = tf.keras.layers.Input(shape=(tweet_dim, ))
    norm_feature = tf.keras.layers.LayerNormalization()(features)
    int_feature = tf.keras.layers.Dense(hidden_dim,
                                        activation='relu')(norm_feature)
    int_feature = tf.keras.layers.Dropout(dropout)(int_feature)

    att_root = tf.keras.layers.RepeatVector(num_users * time_steps)(root_tweet)
    att_root = tf.keras.layers.Reshape(
        (num_users, time_steps, tweet_dim))(att_root)
    full_feature = tf.keras.layers.Concatenate(axis=-1)(
        [att_root, int_feature])
    norm_full_feature = tf.keras.layers.LayerNormalization()(full_feature)
    norm_full_feature = tf.keras.layers.TimeDistributed(
        tf.keras.layers.GRU(hidden_dim,
                            return_sequences=True))(norm_full_feature)
    norm_full_feature = tf.keras.layers.Dropout(dropout)(norm_full_feature)
    norm_full_feature = tf.keras.layers.LayerNormalization()(norm_full_feature)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(norm_full_feature)

    return tf.keras.models.Model([features, root_tweet], out)

### Masking Weight Loss

In [None]:
def masked_weighted_loss(pos_w=1.5, mask_value=-1.):
    '''Returns binary cross entropy loss function.
    'mask_value':= value used to pad ground truth tensor.
    'pos_w':= weight for positive samples (compute as log(total sample/positive sample))'''
    def mbce(labels, logits):
        sq_label = tf.squeeze(labels, axis=-1)
        masks = tf.cast(tf.math.not_equal(sq_label, mask_value),
                        dtype=tf.float32)
        positives = tf.cast(tf.math.equal(sq_label, 1.),
                            dtype=tf.float32) * pos_w

        loss = tf.keras.losses.binary_crossentropy(
            labels * tf.expand_dims(masks, axis=-1),
            logits * tf.expand_dims(masks, axis=-1))
        weights = tf.maximum(positives, masks)
        return tf.keras.backend.mean(loss * weights, axis=-1)

    return mbce

## Common feature loading

In [None]:
# Topical influence.
with open(os.path.join(data, 'doc2vec_root_static_train.pickle'),
          'rb') as handle:
    doc2_vec_train = pickle.load(handle)
with open(os.path.join(data, 'doc2vec_root_static_test.pickle'),
          'rb') as handle:
    doc2_vec_test = pickle.load(handle)

In [None]:
# Exogenous news influence.
# NOTE: for news_abalation change the file names according to capture size 15,30,45,60.
# Default news size is 60 per tweet.
with open(os.path.join(data, "news2vec_static_train.npy"), "rb") as f:
    news2_vec_train = np.load(f)
with open(os.path.join(data, "news2vec_static_test.npy"), "rb") as f:
    news2_vec_test = np.load(f)

## Dynamic Feature loading

In [None]:
## Train-test features.
h5f = h5py.File(
    os.path.join(data,
                 'train_dynamic_neural_shuffle_3057_70_' + name_ext + '.h5'),
    'r')
train_features = h5f['dataset_1'][:]
h5f.close()
h5f = h5py.File(
    os.path.join(data,
                 'test_dynamic_neural_shuffle_765_100_' + name_ext + '.h5'),
    'r')
test_features = h5f['dataset_1'][:]
h5f.close()

In [None]:
## Train-test target labels.
with open('dynamic_neural_labels_temp_shuffle_new_' + name_ext + '.pickle',
          'rb') as handle:
    train_labels = pickle.load(handle)
with open(
        'dynamic_neural_labels_temp_test_shuffle_new_' + name_ext + '.pickle',
        'rb') as handle:
    test_labels = pickle.load(handle)

# STATIC Features Loading

In [None]:
## Train-test features.
h5f = h5py.File('train_static_neural_shuffle_3057_200_' + name_ext + '.h5',
                'r')
train_features = h5f['dataset_1'][:]
h5f.close()
h5f = h5py.File('test_static_neural_shuffle_765_500_' + name_ext + '.h5', 'r')
test_features = h5f['dataset_1'][:]
h5f.close()

In [None]:
## Train-test labels.
with open('static_neural_labels_temp_shuffle_new_' + news_ext + '.pickle',
          'rb') as handle:
    train_labels = pickle.load(handle)
with open('static_neural_labels_temp_test_shuffle_new_' + news_ext + '.pickle',
          'rb') as handle:
    test_labels = pickle.load(handle)

# Train & Score the model

In [None]:
scores = {}
scores["config"] = {}
scores["evaluation"] = {}
scores["config"]["model"] = "Dynamic"  # SET to Static for static
scores["config"]["opt_type"] = "SGD"  # SET to ADAM for static
scores['config'][ablation_feature] = int(ablation_value)
# scores["config"]["history"]=int(history)
# scores["config"]["max_feature_length"] = int(max_feature_length)

att_mode = "news"  # or tweets
activation = "relu"
i = 2.5  ## SET to 2 for static
n_batch = 32  ## SET to 16 for static
n_epoch = 10

scores["config"]["att_mode"] = att_mode
scores["config"]["activation"] = activation
scores["config"]["i"] = i
scores["config"]["n_batch"] = n_batch
scores["config"]["n_epoch"] = n_epoch

In [None]:
opt = tf.keras.optimizers.SGD(learning_rate=0.01)  # FOR DYNAMIC
# opt='adam' # FOR STATIC

In [None]:
pos = (np.count_nonzero(train_labels.flatten() == 1))
pos_w = math.log(len(train_labels.flatten()) / pos)
print(pos_w)

train_model = TemporalModel1(
    num_users=70)  #Static or non-exogenous variants called similarly.
train_model.compile(loss=masked_weighted_loss(pos_w * i), optimizer=opt)

_ = train_model.fit([train_features, news2_vec_train, doc2_vec_train],
                    train_labels,
                    batch_size=n_batch,
                    epochs=n_epoch)

In [None]:
## Test model and predict values.
train_weights = train_model.get_weights()
test_model = TemporalModel1(num_users=100)
test_model.set_weights(train_weights)
y_new = test_model.predict([test_features, news2_vec_test, doc2_vec_test])

In [None]:
def apk(actual, pred, k):
    # Function to calculate Average Precision @K, used in MAP.
    predicted = np.argsort(pred)[-k:][::-1]
    score = 0.0
    num_hits = 0.0
    for i, p in enumerate(predicted):
        if p in actual:
            num_hits += 1.0
            score += num_hits / (i + 1.0)
    return score / min(len(actual), k)


def hitsk(actual, pred, k=1):
    # Function to calculate hits @ k.
    predicted = np.argsort(pred)[-k:]
    aucc = 0
    for i in predicted:
        if i in actual:
            aucc += 1
    return aucc / min(len(actual), k)

In [None]:
k_list = [1, 5, 10, 20, 50,
          100]  # Calculate MAP and HITS for various values of k.
print("\n")
for k in k_list:
    map_ = 0
    c = 0
    for i in range(765):
        q = set(test_labels[i].flatten().nonzero()[0])
        if q:
            map_ += apk(q, y_new[i].flatten(), k)
            c += 1
    map_ = map_ / c
    print("MAP@{} = {}".format(k, map_))
    scores["evaluation"]["map@{}".format(k)] = map_

print("\n")
for k in [1, 5, 10, 20, 50, 100]:
    hit_ = 0
    c = 0
    for i in range(765):
        q = set(test_labels[i].flatten().nonzero()[0])
        if q:
            hit_ += hitsk(q, y_new[i].flatten(), k)
            c += 1
    hit_ = hit_ / c
    print("HITS@{} = {}".format(k, hit_))
    scores["evaluation"]["hits@{}".format(k)] = hit_

In [None]:
y_pred = []
y_actual = []
for i in range(765):
    p_temp = y_new[i].flatten()
    a_temp = test_labels[i].flatten()
    y_actual.extend(a_temp)
    for each in p_temp:
        if each < 0.5:
            y_pred.append(0)
        else:
            y_pred.append(1)

    assert len(y_pred) == len(y_actual)
assert len(y_pred) == len(y_actual)

print(classification_report(y_actual, y_pred))
scores["evaluation"]["classification_report"] = classification_report(
    y_actual, y_pred, output_dict=True)

roc_auc = roc_auc_score(y_actual, y_pred)
print("ROC_AUC = {}".format(roc_auc))
scores["evaluation"]["roc_auc"] = roc_auc

In [None]:
scores

In [None]:
with open(
        os.path.join(
            data, 'retweet_pred_dynamic_' + ablation_feature + '_' +
            ablation_value + '.json'), 'w') as f:
    json.dump(scores, f, indent=True)

In [None]:
print("Done")