In [None]:
import numpy as np
import os
import tensorflow as tf
from keras.models import Model
from keras.regularizers import l2
#from keras import optimizers
from keras.layers import Input, Reshape, Dense, Dropout, Add, Layer, MultiHeadAttention, Embedding, LSTM, Bidirectional
import keras.backend as K
from spektral.layers import GraphSageConv

import random, pickle, math, h5py, heapq
import scipy.sparse as sp
from scipy.spatial.distance import cosine
from sklearn.preprocessing import normalize
from sklearn import random_projection

import seaborn as sns
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=1)
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

tf.compat.v1.keras.backend.set_session(sess)


num_epoch = 700
text_embedding_size = 300
feature_size = 300

num_user = 1000
num_tags = 3896
seq_length = 49    # max length of text sequence
dim_k = 100

# post-tag attention
top_u_post_num = 5
top_o_post_num = 10
top_gamma_tag = 10
post_embedding_size =300
tag_embedding_size =300

support = 1
today="0907-1"

num_user_per_batch = 23
num_post_per_batch = 500
num_batch = 40
value = 0.9
date_num = "0116"

tf.config.list_physical_devices()

#### Utils

In [None]:
def one_hot_encoding(inputs, num_total):
    output = np.zeros((len(inputs),num_total),dtype=np.int32)
    for i in range(len(inputs)):
        for t in inputs[i]:
            output[i][t]=1

    return output

def batch_train_label(tag_total, idx_train_array, test_array_len):
	num_tags = 3896
	batch_label = []
	for i in range(len(idx_train_array)):
		tmp = np.zeros(num_tags, dtype=int)
		tmp[np.array(tag_total[idx_train_array[i]], dtype=np.int32)] = 1
		batch_label.append(tmp)

	for j in range(test_array_len):
		batch_label.append(np.zeros(num_tags, dtype=int))

	batch_label = np.array(batch_label)
	return batch_label

def batch_test_label(tag_total, idx_test_array):
	num_tags = 3896
	batch_label = []
	for i in range(len(idx_test_array)):
		tmp = np.zeros(num_tags, dtype=int)
		tmp[np.array(tag_total[idx_test_array[i]], dtype=np.int32)] = 1
		batch_label.append(tmp)

	batch_label = np.array(batch_label)
	return batch_label

def adj_by_newcosine(feature,threshold):

	vectors = np.array(feature)
	similarity = np.dot(vectors, vectors.T)
	del vectors
	inv_square_mag = 1 / (np.diag(similarity))
	inv_square_mag[np.isinf(inv_square_mag)] = 0
	inv_mag = np.sqrt(inv_square_mag)
	cosine = similarity * inv_mag
	del similarity, inv_square_mag
	cosine = cosine.T * inv_mag

	score = threshold
	queries = np.zeros(feature.shape[0], dtype=int)
	for i in range(feature.shape[0]):
		queries[i] = i

	row = []
	col = []
	data = []
	for sim,query in zip(cosine, queries):
		sim = list(sim)
		tmp = heapq.nlargest(2, sim)
		if float(tmp[1]) <= score:
			continue
		else:
			sort_cosine = sorted(enumerate(sim), reverse=True, key=lambda x: x[1])
			for item in sort_cosine:
				if query == int(item[0]):
					continue
				else:
					if float(item[1]) > score:
						row.append(query)
						col.append(int(item[0]))
						data.append(float(item[1]))
					else:
						break

	adj = sp.coo_matrix((data, (row, col)), shape=(feature.shape[0], feature.shape[0]))
	adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

	del inv_mag, cosine, queries, row, col, data

	return adj

def img_adj_by_newcosine(img_feature, threshold):
	edges = []
	tmp_img_feature = []
	for i in range(img_feature.shape[0]):
		tmp = img_feature[i].reshape(1,-1)
		tmp_img_feature.append(tmp)
	tmp_img_feature = np.squeeze(np.array(tmp_img_feature))

	transformer = random_projection.GaussianRandomProjection()
	new_img_feature = transformer.fit_transform(tmp_img_feature)
	del transformer, tmp_img_feature

	adj = adj_by_newcosine(new_img_feature,threshold)

	return adj

# true if idx is in batch training list
def sample_mask(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=bool)

def normalize_adj(adj, symmetric=True):
	if symmetric:
		d = sp.diags(np.power(np.array(adj.sum(1)), -0.5).flatten(), 0)
		a_norm = adj.dot(d).transpose().dot(d).tocsr()
	else:
		d = sp.diags(np.power(np.array(adj.sum(1)), -1).flatten(), 0)
		a_norm = d.dot(adj).tocsr()
	return a_norm

def preprocess_adj(adj, symmetric=True):
	adj = adj + sp.eye(adj.shape[0])
	adj = normalize_adj(adj, symmetric)
	return adj

def myLossFunc(y_true, y_pred):
	probs_log = -K.log(y_pred)
	loss = K.mean(K.sum(probs_log*tf.cast(y_true, tf.float32), axis=-1))
	return loss

def evaluator(y_true, y_pred, top_K):
	acc_count = 0
	precision_K = []
	recall_K = []
	f1_K = []

	for i in range(y_pred.shape[0]):
		top_indices = y_pred[i].argsort()[-top_K:]
		#print(i, top_indices)
		if np.sum(y_true[i, top_indices]) >= 1:
			acc_count += 1
		p = np.sum(y_true[i, top_indices]) / top_K
		r = np.sum(y_true[i, top_indices]) / np.sum(y_true[i, :])
		precision_K.append(p)
		recall_K.append(r)
		if p != 0 or r != 0:
			f1_K.append((2 * (p * r)) / (p + r))
		else:
			f1_K.append(0)
	acc_K = acc_count * 1.0 / y_pred.shape[0]
	mp = np.mean(np.array(precision_K))
	mr = np.mean(np.array(recall_K))
	if mp+mr !=0:
		f1 = 2*mp*mr/(mp+mr)
	else:
		f1 = 0

	#return acc_K, np.mean(np.array(precision_K)), np.mean(np.array(recall_K)), np.mean(np.array(f1_K))
	return acc_K, np.mean(np.array(precision_K)), np.mean(np.array(recall_K)), np.mean(np.array(f1_K)), f1
def zero_padding(X, seq_length):
	X_ = []
	for x in X:
		row = list(x)[:seq_length] + [0] * max(seq_length-len(x), 0)
		X_.append(np.array(row)*1.0)
	return np.array(X_).astype(int)


def get_batch_feature(batch_train_id,text_id_total):

    h5f = h5py.File("Preprocess_data/dataset_%s.h5"%(today), "r")
    vgg_h5f = h5py.File("TAGNet/insta_imgFeat_%s.h5"%(today), "r")

    train_img = []
    train_text = []
    train_u_post = []
    train_o_post = []
    train_u_tag = []
    train_o_tag = []
    train_text_id = []

    for id in batch_train_id:
        train_img.append(h5f['img_feature'][id])
        #train_img.append(vgg_h5f[ids_total[id]])
        train_text.append(h5f['bert_feature'][id])
        train_u_post.append(h5f['u_post_feature'][id])
        train_o_post.append(h5f['o_post_feature'][id])
        train_u_tag.append(h5f['u_tag_feature'][id])
        train_o_tag.append(h5f['o_tag_feature'][id])
        train_text_id.append(text_id_total[id])

    #np.array(np.squeeze(train_img, axis=1))
    return np.array(train_img),np.array(train_text), np.array(train_u_post), np.array(train_o_post), np.array(train_u_tag), np.array(train_o_tag), np.array(train_text_id)

### Models

#### Layer

In [None]:
class BatchMemory(Layer):
    def __init__(self, num_proposals, input_dim, num_slot, memory_size, n_heads,**kwargs):
        super(BatchMemory, self).__init__(**kwargs)
        self.num_proposals = num_proposals # num_days
        self.input_dim = input_dim # == memory dimension d
        self.num_slot = num_slot # number of memory slot K
        self.memory_size = memory_size # memory dimension d
        self.n_heads = n_heads # read out number
        self.attention_dim = input_dim # attention_dim

        #self.denseFeature = Dense(self.num_slot, activation="tanh", use_bias = False)
        self.denseErase = Dense(self.memory_size, activation="tanh", use_bias=True)
        self.denseRead = Dense(self.memory_size, activation="tanh", use_bias=True)

        self.denseAdd = Dense(self.memory_size, activation="tanh", use_bias=True)
        self.multi_head_attention = MultiHeadAttention(num_heads=5, key_dim=64)
        #self.cos = cosineSimilarity(dim=0)

    def build(self, input_shape):
        self.key_matrix = self.add_weight(name="key_matrix",
									initializer="RandomNormal",
									# initializer="ones",
									shape=( self.memory_size, self.num_slot), # (d, K)
									trainable=True)
        self.mem_matrix = self.add_weight(name="mem_matrix",
									initializer="RandomNormal",
									# initializer="ones",
									shape=( self.num_slot, self.memory_size), # (K, d)
									trainable=True)
    # batch_size = 470
    # num_proposal
    # memeory_size = 512

    def call(self, inputs):

        loss = 0.0
        count = 0

        inputs.set_shape([num_post_per_batch,49,feature_size])

        c_vectors = tf.reshape(inputs, (-1, self.input_dim))  # Reshape inputs to (batch_size * num_proposals, input_dim)
        #extract_c_vectors = tf.expand_dims(c_vectors, axis=-1)  # Add extra dimension at the end: (batch_size * num_proposals, input_dim, 1)
        inner_product = tf.matmul(c_vectors, self.key_matrix) # (batch_size * num_proposals, num_slot)

        filter_length = tf.sqrt(tf.reduce_sum(tf.square(c_vectors), axis=1, keepdims=True))  # (batch_size * num_proposals, 1)
        key_length = tf.sqrt(tf.reduce_sum(tf.square(self.key_matrix), axis=0, keepdims=True))  # (num_slot, 1)

        cosine_similarity= tf.math.divide(inner_product,filter_length * key_length)

        cosine_similarity_mul = cosine_similarity * 100
        correlation_weight = tf.nn.softmax(cosine_similarity_mul, axis=0)
        correlation_weight_mask = tf.where(correlation_weight < 1e-10, tf.zeros_like(correlation_weight), correlation_weight)
        # (batch_size * num_proposals, num_slot)
        correlation_weight_final = tf.clip_by_value(correlation_weight_mask * cosine_similarity, clip_value_min=0, clip_value_max=tf.float32.max)
        correlation_weight_final = tf.expand_dims(correlation_weight_final, axis = 2)  # (batch_size * num_proposals, num_slot, 1)

        erase_vector = self.denseErase(c_vectors)  # (batch_size * num_proposals, memory_size)
        erase_vector = tf.sigmoid(erase_vector)
        erase_vector = tf.expand_dims(erase_vector, axis = 1) # (batch_size * num_proposals, 1, memory_size)

        add_vector = self.denseAdd(c_vectors)  # (batch_size * num_proposals, memory_size)
        add_vector = tf.tanh(add_vector)
        add_vector = tf.expand_dims(add_vector, axis = 1)

        erase_mul = tf.matmul(correlation_weight_final, erase_vector)  # (batch_size * num_proposals, num_slot, memory_size)

        expanded_memory = tf.expand_dims(self.mem_matrix, axis=0)
        repeated_memory = tf.repeat(expanded_memory, repeats=(num_post_per_batch*49), axis=0)  # (batch_size * num_proposals, num_slot, memory_size)

        erase = repeated_memory * (1 - erase_mul)  # (num_slot, memory_size) * (memory_size, batch_size * num_proposals)
        add = tf.matmul(correlation_weight_final, add_vector)  # (batch_size * num_proposals, num_slot, memory_size)

        updated_value = erase + add  # (batch_size * num_proposals, num_slot, memory_size) (23030,49,64)

        read_key = self.denseRead(c_vectors) # (batch_size * num_proposals, memory_size) (23030,64)
        read_key= tf.expand_dims(read_key,axis=1) #(23030,64,1)

        inner_product = tf.matmul(read_key, self.key_matrix) #(233030,1,49)

        filter_length = tf.sqrt(tf.reduce_sum(tf.square(read_key), axis=-1, keepdims=True)) #(23030,1,1)
        #memory_length = tf.sqrt(tf.reduce_sum(tf.square(updated_value), axis=-1, keepdims=True)) #(23030,49,1)
        cosine_similarity= tf.math.divide(inner_product,filter_length * key_length) #(23030,1,49)
        #updated_value= tf.transpose(updated_value,perm=[0,2,1])

        memory_embedding = tf.matmul(cosine_similarity,updated_value)

        memory_embedding = tf.math.l2_normalize(memory_embedding, axis=0)


        new_inputs = tf.reshape(memory_embedding, (inputs.shape[0], inputs.shape[1], self.memory_size))  # Reshape back to (batch_size, num_proposals, memory_size * n_heads)
        #tf.tensor_scatter_nd_update(inputs, updates, memory_embedding)

        #new_inputs = tf.where(tf.math.is_nan(new_inputs), tf.zeros_like(new_inputs), new_inputs)

        return new_inputs


In [None]:
class proposal_Attention(Layer):
	def __init__(self, **kwargs):


		super(proposal_Attention, self).__init__(**kwargs)


	def build(self, input_shape):

		if not isinstance(input_shape, list):
			raise ValueError('A Co-Attention_para layer should be called '
								'on a list of inputs.')
		if len(input_shape) != 2:
			raise ValueError('A Co-Attention_para layer should be called on a list of 2 inputs.'
								'Got '+str(len(input_shape))+'inputs.')
		self.img_emb_size = input_shape[0][-1] # 512/300
		self.emb_size = input_shape[1][-1] # 300 embedding size
		self.num_proposal = input_shape[1][1] # 49

		self.Wu = self.add_weight(name="Wu",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.img_emb_size),
									trainable=True)

		self.Wl = self.add_weight(name="Wl",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.emb_size),
									trainable=True)
		self.Wr = self.add_weight(name="Wr",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.emb_size),
									trainable=True)
		self.Wu2 = self.add_weight(name="Wu2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.img_emb_size),
									trainable=True)

		self.Wl2 = self.add_weight(name="Wl2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.emb_size),
									trainable=True)
		self.Wr2 = self.add_weight(name="Wr2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.emb_size, self.emb_size),
									trainable=True)

		super(proposal_Attention, self).build(input_shape)  # Be sure to call this somewhere!

	def call(self, inputs, mask=None):

		img_emb = inputs[0]  # (batch_size, num_proposal, img_emb_size)
		text_emb = inputs[1]  # (batch_size, num_proposal, emb_size)

		img_emb_t = K.permute_dimensions(img_emb, (0, 2, 1)) # (batch_size, img_emb_size, num_proposal)

		text_emb_t = K.permute_dimensions(text_emb, (0, 2, 1)) # (batch_size, emb_size, num_proposal)

		R = K.tanh(K.dot(self.Wu, img_emb_t)+K.dot(self.Wl, text_emb_t)) # (emb_size, batch_size, num_proposal)

		R_t = K.permute_dimensions(R, (1, 0, 2)) # (batch_size, emb_size, num_proposal)

		softmax_pi = K.softmax(K.permute_dimensions(K.dot(self.Wr, R_t), (1, 0, 2))) # (batch_size, emb_size, num_proposal)

		img_output = softmax_pi* softmax_pi


		img_emb_t = K.permute_dimensions(img_emb, (0, 2, 1)) # (batch_size, img_emb_size, num_proposal)

		text_emb_t = K.permute_dimensions(text_emb, (0, 2, 1)) # (batch_size, emb_size, num_proposal)

		R = K.tanh(K.dot(self.Wu, img_emb_t)+K.dot(self.Wl, text_emb_t)) # (emb_size, batch_size, num_proposal)

		R_t = K.permute_dimensions(R, (1, 0, 2)) # (batch_size, emb_size, num_proposal)

		softmax_pi = K.softmax(K.permute_dimensions(K.dot(self.Wr, R_t), (1, 0, 2))) # (batch_size, emb_size, num_proposal)

		text_output = softmax_pi* softmax_pi

		return img_output, text_output # (batch_size, g_embedding)


	def get_config(self):
		return super(proposal_Attention, self).get_config()

	def compute_mask(self, inputs, mask=None):
		return None

	def compute_output_shape(self, input_shape):
		output_shape = (input_shape[0][0], input_shape[0][-1])
		return output_shape

In [None]:
class my_coAttention_para(Layer):
	def __init__(self, dim_k, **kwargs):
		super(my_coAttention_para, self).__init__(**kwargs)
		self.dim_k = dim_k  # internal tensor dimension
		self.supports_masking = True

	def build(self, input_shape):
		if not isinstance(input_shape, list):
			raise ValueError('A Co-Attention_para layer should be called '
								'on a list of inputs.')
		if len(input_shape) != 2:
			raise ValueError('A Co-Attention_para layer should be called on a list of 2 inputs.'
								'Got '+str(len(input_shape))+'inputs.')
		self.embedding_size = input_shape[0][-1]
		self.num_region = input_shape[1][1]
		self.seq_len = input_shape[0][1]
		"""
		naming variables following the VQA paper
		"""
		self.Wi = self.add_weight(name="Wi",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wt = self.add_weight(name="Wt",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wpi = self.add_weight(name="Wpi",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.dim_k, 1),
									trainable=True)
		self.Wi2 = self.add_weight(name="Wi2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wt2 = self.add_weight(name="Wt2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wpt = self.add_weight(name="Wpt",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.dim_k, 1),
									trainable=True)

		super(my_coAttention_para, self).build(input_shape)  # Be sure to call this somewhere!

	def call(self, inputs, mask=None):
		tFeature = inputs[0]
		iFeature = inputs[1]
		Ht = K.dot(iFeature, self.Wi2) + K.dot(tFeature, self.Wt2)
		Ht = K.tanh(Ht)
		Pt = K.softmax(K.squeeze(K.dot(Ht, self.Wpt), axis=-1))
		Pt = K.permute_dimensions(K.repeat(Pt, self.embedding_size), (0, 2, 1))
		tfeature = K.sum(Pt * tFeature, axis=1)

		Hi = K.dot(iFeature, self.Wi) + K.dot(tFeature, self.Wt)
		Hi = K.tanh(Hi)
		Pi = K.softmax(K.squeeze(K.dot(Hi, self.Wpi), axis=-1))
		Pi = K.permute_dimensions(K.repeat(Pi, self.embedding_size), (0, 2, 1))
		ifeature = K.sum(Pi * iFeature, axis=1)

		return tfeature+ifeature


	def get_config(self):
		return super(my_coAttention_para, self).get_config()

	def compute_mask(self, inputs, mask=None):
		return None

	def compute_output_shape(self, input_shape):
		output_shape = (input_shape[0][0], input_shape[0][-1])
		return output_shape



In [None]:
class text_user_Attention(Layer):
	def __init__(self, dim_k, **kwargs):
		super(text_user_Attention, self).__init__(**kwargs)
		self.dim_k = dim_k  # internal tensor dimension
		self.supports_masking = True

	def build(self, input_shape):
		if not isinstance(input_shape, list):
			raise ValueError('A Co-Attention_para layer should be called '
								'on a list of inputs.')
		if len(input_shape) != 2:
			raise ValueError('A Co-Attention_para layer should be called on a list of 2 inputs.'
								'Got '+str(len(input_shape))+'inputs.')
		self.embedding_size = input_shape[0][-1]
		self.num_region = input_shape[1][1]
		self.seq_len = input_shape[0][1]
		"""
		naming variables following the VQA paper
		"""
		self.Wu = self.add_weight(name="Wu",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wt = self.add_weight(name="Wt",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wpt = self.add_weight(name="Wpt",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.dim_k, 1),
									trainable=True)
		self.Wu2 = self.add_weight(name="Wu2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wt2 = self.add_weight(name="Wt2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.dim_k),
									trainable=True)
		self.Wpu = self.add_weight(name="Wpu",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.dim_k, 1),
									trainable=True)

		super(text_user_Attention, self).build(input_shape)  # Be sure to call this somewhere!

	def call(self, inputs, mask=None):
		tFeature = inputs[1]
		uFeature = inputs[0]

		Ht = K.dot(uFeature, self.Wu) + K.dot(tFeature, self.Wt)
		Ht = K.tanh(Ht)
		Pt = K.softmax(K.squeeze(K.dot(Ht, self.Wpt), axis=-1))
		Pt = K.permute_dimensions(K.repeat(Pt, self.embedding_size), (0, 2, 1))
		tfeature = K.sum(Pt * tFeature, axis=1)

		Hu = K.dot(uFeature, self.Wu2) + K.dot(tFeature, self.Wt2)
		Hu = K.tanh(Hu)
		Pu = K.softmax(K.squeeze(K.dot(Hu, self.Wpu), axis=-1))
		Pu = K.permute_dimensions(K.repeat(Pu, self.embedding_size), (0, 2, 1))
		ufeature = K.sum(Pu * uFeature, axis=1)

		return tfeature+ufeature

	def get_config(self):
		return super(text_user_Attention, self).get_config()

	def compute_mask(self, inputs, mask=None):
		return None

	def compute_output_shape(self, input_shape):
		output_shape = (input_shape[0][0], input_shape[0][-1])
		return output_shape

In [None]:
class post_tag_Attention(Layer):
	def __init__(self, **kwargs):

		super(post_tag_Attention, self).__init__(**kwargs)


	def build(self, input_shape):

		if not isinstance(input_shape, list):
			raise ValueError('A Co-Attention_para layer should be called '
								'on a list of inputs.')
		if len(input_shape) != 2:
			raise ValueError('A Co-Attention_para layer should be called on a list of 2 inputs.'
								'Got '+str(len(input_shape))+'inputs.')

		self.embedding_size = input_shape[0][-1] # 300 embedding size
		self.num_region = input_shape[1][1] # 10 gamma
		self.seq_len = input_shape[0][1] # 5/10 user/other post num

		self.Wh = self.add_weight(name="Wh",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.embedding_size),
									trainable=True)

		self.We = self.add_weight(name="We",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.embedding_size),
									trainable=True)
		self.Wh2 = self.add_weight(name="Wh2",
									initializer="random_normal",
									# initializer="ones",
									shape=(self.embedding_size, self.embedding_size),
									trainable=True)
		self.Wf = self.add_weight(name="Wf",
									initializer="random_normal",
									# initializer="ones",
									shape=(1, self.embedding_size),
									trainable=True)

		super(post_tag_Attention, self).build(input_shape)  # Be sure to call this somewhere!

	def call(self, inputs, mask=None):

		EMu = inputs[0]  # (batch_size, post_num, g_embedding)
		EHu = inputs[1]  # (batch_size, gamma_tag_num, g_embedding)

		EMu_t = K.permute_dimensions(EMu, (0, 2, 1)) # (batch_size, g_embedding, k_post_num)

		EHu_t = K.permute_dimensions(EHu, (0, 2, 1)) # (batch_size, g_embedding, gamma_tag_num)

		Q = K.tanh(K.batch_dot(K.dot(EMu, self.Wh), EHu_t)) # (batch_size, post_num, gamma_tag_num)
		Q_t = K.permute_dimensions(Q, (0, 2, 1)) # (batch_size, gamma_tag_num, post_num)

		F1 = K.permute_dimensions(K.dot(self.We, EMu_t), (1,0,2)) # (g_embedding, batch_size, post_num) -> (batch_size, g_embedding, post_num)
		F2 = K.permute_dimensions(K.dot(self.Wh2, EHu_t), (1,0,2)) # (g_embedding, batch_size, gamma_tag_num) -> (batch_size, g_embedding, gamma_tag_num)
		F2_ = K.batch_dot(F2, Q_t) # (batch_size,g_embedding, post_num)
		F = K.tanh(F1+F2_) # (batch_size, g_embedding, post_num)

		pi_ = K.permute_dimensions(K.dot(self.Wf, F), (1,0,2)) # (1,batch_size,5) -> (batch_size, 1, post_num)
		pi = K.squeeze(pi_, axis= 1) # (batch_size, post_num)

		f = K.repeat(pi, self.embedding_size) # (batch_size, g_embedding, post_num)
		feature = K.sum(f * EMu_t, axis= -1) #(batch_size, g_embedding, post_num) -> (batch_size, g_embedding)

		return feature # (batch_size, g_embedding)


	def get_config(self):
		return super(post_tag_Attention, self).get_config()

	def compute_mask(self, inputs, mask=None):
		return None

	def compute_output_shape(self, input_shape):
		output_shape = (input_shape[0][0], input_shape[0][-1])
		return output_shape

In [None]:
def positional_encoding(seq_len, d_model):
    position = np.arange(seq_len)[:, np.newaxis]
    div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))

    pos_enc = np.zeros((seq_len, d_model))
    pos_enc[:, 0::2] = np.sin(position * div_term)
    pos_enc[:, 1::2] = np.cos(position * div_term)
    pos_enc = pos_enc[np.newaxis, ...]  # Add batch dimension

    return tf.convert_to_tensor(pos_enc, dtype=tf.float32)

#### Model

In [None]:
def modelDef(lr):

    # define input data shape
    inputs_img = Input(shape=(7, 7, 512)) #(480, 7, 7, 512)
    inputs_text = Input(shape=(49)) #(480, 49, 300)
    inputs_user_post = Input(shape=(top_u_post_num, 768)) #(480, 5, 768)
    inputs_other_post = Input(shape=(top_o_post_num, 768)) #(480, 10, 768)
    inputs_user_tag = Input(shape=(top_gamma_tag, num_tags)) #(480, 10, 3896)
    inputs_other_tag = Input(shape=(top_gamma_tag, num_tags)) #(480, 10, 3896)

    G = [Input(batch_input_shape=(None, None), sparse=True)] # adjacency matrix (480, 480)
    G2 = [Input(batch_input_shape=(None, None), sparse=True)]

    reshapeImg = Reshape(target_shape=(7*7, 512)) # (7, 7, 512) -> (49, 512)
    memoryImg = BatchMemory(49 , feature_size, 49, feature_size, 5)
    denseImg = Dense(feature_size, activation="tanh", use_bias=True) # (49, 512 -> 49,300)

    textEmbeddings = Embedding(input_dim=212003, output_dim=150, mask_zero=True, input_length=49)
    BiLSTM = Bidirectional(LSTM(units=150, return_sequences=True))
    denseText = Dense(feature_size, activation="tanh", use_bias=True)
    #memoryText = BatchMemory(49 , feature_size, 49, feature_size, 5)

    TagEmbeddings = Dense(768) # (10, 3896) -> (10,768)

    # build attention model
    user_Att_layer = post_tag_Attention()
    other_Att_layer = post_tag_Attention()

    # build multiple feature co-attention
    it_Att_layer = my_coAttention_para(dim_k=dim_k)
    tu_Att_layer = my_coAttention_para(dim_k=dim_k)
    iu_Att_layer = my_coAttention_para(dim_k=dim_k)
    io_Att_layer = my_coAttention_para(dim_k=dim_k)
    ou_Att_layer = my_coAttention_para(dim_k=dim_k)
    to_Att_layer = my_coAttention_para(dim_k=dim_k)

    # build dense layer
    densePost = Dense(49*feature_size, activation="tanh", use_bias=True) # (1*768) -> (49*300)
    denseOPost = Dense(49*feature_size, activation="tanh", use_bias=True)
    reshapePost = Reshape(target_shape=(49, feature_size)) # (49*300) -> (49, 300)

    #Ablation
    reshapeAdded = Reshape(target_shape=(1,49*feature_size)) # (49,64) -> (1,49*64)
    denseAdded = Dense(feature_size, activation="tanh", use_bias=True) # (1,49*64) -> (1,64)

    # reshape image feature (49*512 -> 49*300)
    iFeature = reshapeImg(inputs_img)
    iFeature = denseImg(iFeature)
    iFeature = memoryImg(iFeature)

    # reshape text feature (768 -> 49*300)
    text_embeddings = textEmbeddings(inputs_text)
    tFeature = BiLSTM(text_embeddings+pos_encoding_repeated)

    # encode post-tag data
    user_EHu = TagEmbeddings(inputs_user_tag)
    user_EMu = inputs_user_post

    other_EMu = inputs_other_post
    other_EHu = TagEmbeddings(inputs_other_tag)

    # post-tag attention
    uFeature = user_Att_layer([user_EMu, user_EHu])
    oFeature = other_Att_layer([other_EMu, other_EHu])

    #uFeature = Reshape(target_shape=(1,top_u_post_num*768))(inputs_user_post)
    uFeature = densePost(uFeature)
    uFeature = reshapePost(uFeature)

    #oFeature = Reshape(target_shape=(1,top_o_post_num*768))(inputs_other_post)
    oFeature = denseOPost(oFeature)
    oFeature = reshapePost(oFeature)

    # multiple attention (less important feat., more important feat.)

    it_vector = it_Att_layer([tFeature, iFeature])
    tu_vector = tu_Att_layer([uFeature, tFeature])
    iu_vector = iu_Att_layer([uFeature, iFeature])
    io_vector = io_Att_layer([oFeature, iFeature])
    to_vector = to_Att_layer([oFeature, tFeature])
    uo_vector = ou_Att_layer([oFeature, uFeature])
    added = Add()([it_vector, tu_vector, iu_vector, io_vector, to_vector, uo_vector])

    # SAGE model for existing relation G
    H_G = Dropout(0.5)(added)
    H_G = GraphSageConv(64, name='GraphSage_G_1', aggregate='mean', activation='relu', kernel_regularizer=l2(5e-4))([H_G]+G)
    H_G = Dropout(0.5)(H_G)
    Y_G = GraphSageConv(num_tags, name='GraphSage_G_2', aggregate='mean', activation='softmax')([H_G]+G)

    H_G2 = GraphSageConv(64, name='GraphSage_G2_1', aggregate='mean', activation='relu', kernel_regularizer=l2(5e-4))([H_G]+G2)
    H_G2 = Dropout(0.5)(H_G2)
    Y_G2 = GraphSageConv(num_tags, name='GraphSage_G2_2', aggregate='mean', activation='softmax')([H_G2]+G2)

    # Combine outputs from both relations (e.g., concatenate or add)
    # Modify this line depending on how you want to combine them
    final_output = Y_G*0.95 + Y_G2*0.05


    model = Model(inputs=[inputs_img, inputs_text, inputs_user_post, inputs_other_post, inputs_user_tag, inputs_other_tag]+G+G2, outputs=[final_output])#model = Model(inputs=[inputs_img, inputs_text, inputs_user_post, inputs_other_post]+G, outputs=Y)
    sgd =  tf.keras.optimizers.SGD(learning_rate=lr, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss=myLossFunc, optimizer=sgd)

    return model

In [None]:
def modelDef_wo(lr):
    #tf.config.run_functions_eagerly(True)
    # define input data shape
    inputs_img = Input(shape=(7, 7, 512)) #(480, 7, 7, 512)
    inputs_text = Input(shape=(49)) #(480, 49, 300)
    inputs_user_post = Input(shape=(top_u_post_num, 768)) #(480, 5, 768)
    inputs_other_post = Input(shape=(top_o_post_num, 768)) #(480, 10, 768)
    inputs_user_tag = Input(shape=(top_gamma_tag, num_tags)) #(480, 10, 3896)
    inputs_other_tag = Input(shape=(top_gamma_tag, num_tags)) #(480, 10, 3896)

    G = [Input(batch_input_shape=(None, None), sparse=True)] # adjacency matrix (480, 480)
    G2 = [Input(batch_input_shape=(None, None), sparse=True)]

    reshapeImg = Reshape(target_shape=(7*7, 512)) # (7, 7, 512) -> (49, 512)
    denseImg = Dense(feature_size, activation="tanh", use_bias=True) # (49, 512 -> 49,300)
    memoryImg = BatchMemory(49 , feature_size, 49, feature_size, 5)



    textEmbeddings = Embedding(input_dim=212003, output_dim=150, mask_zero=True, input_length=49)
    BiLSTM = Bidirectional(LSTM(units=150, return_sequences=True))
    #denseText = Dense(feature_size, activation="tanh", use_bias=True)
    #memoryText = BatchMemory(49 , feature_size, 49, feature_size, 5)

    TagEmbeddings = Dense(768) # (10, 3896) -> (10,768)

    # build attention model
    user_Att_layer = post_tag_Attention()
    other_Att_layer = post_tag_Attention()

    # build multiple feature co-attention
    it_Att_layer = my_coAttention_para(dim_k=dim_k)
    tu_Att_layer = text_user_Attention(dim_k=dim_k)

    # build dense layer
    densePost = Dense(49*feature_size, activation="tanh", use_bias=True) # (1*768) -> (49*300)
    denseOPost = Dense(49*feature_size, activation="tanh", use_bias=True)
    reshapePost = Reshape(target_shape=(49, feature_size)) # (49*300) -> (49, 300)

    #Ablation
    reshapeAdded = Reshape(target_shape=(1,49*feature_size)) # (49,64) -> (1,49*64)
    denseAdded = Dense(feature_size, activation="tanh", use_bias=True) # (1,49*64) -> (1,64)

    # build model

    # reshape image feature (49*512 -> 49*300)
    iFeature = reshapeImg(inputs_img)
    iFeature = denseImg(iFeature)
    iFeature = memoryImg(iFeature)

    # reshape text feature (768 -> 49*300)
    text_embeddings = textEmbeddings(inputs_text)
    tFeature = BiLSTM(text_embeddings)

    # encode post-tag data
    user_EHu = TagEmbeddings(inputs_user_tag)
    user_EMu = inputs_user_post

    other_EMu = inputs_other_post
    other_EHu = TagEmbeddings(inputs_other_tag)

    # post-tag attention
    uFeature = user_Att_layer([user_EMu, user_EHu])
    oFeature = other_Att_layer([other_EMu, other_EHu])

    #uFeature = Reshape(target_shape=(1,top_u_post_num*768))(inputs_user_post)
    uFeature = densePost(uFeature)
    uFeature = reshapePost(uFeature)

    #oFeature = Reshape(target_shape=(1,top_o_post_num*768))(inputs_other_post)
    oFeature = denseOPost(oFeature)
    oFeature = reshapePost(oFeature)


    g_iFeature = iFeature
    g_tFeautre = tFeature

    # multiple attention (less important feat., more important feat.)

    it_vector = it_Att_layer([tFeature, iFeature])
    tu_vector = it_Att_layer([uFeature, tFeature])
    iu_vector = it_Att_layer([uFeature, iFeature])
    io_vector = it_Att_layer([oFeature, iFeature])
    to_vector = it_Att_layer([oFeature, tFeature])
    uo_vector = it_Att_layer([oFeature, uFeature]) # need to change attention layer
    #added = Add()([iu_vector,it_vector, tu_vector])
    added = Add()([io_vector,iu_vector,it_vector, tu_vector, to_vector, uo_vector])

    added = Add()([iFeature,tFeature,uFeature,oFeature])
    #print(added)
    added = K.squeeze(Reshape(target_shape=(1,49*feature_size))(added), axis = 1)
    #print(added)
    added = Dense(feature_size, activation="tanh", use_bias=True)(added)

    # SAGE model for existing relation G
    H_G = Dropout(0.5)(added)
    H_G = GraphSageConv(64, name='GraphSage_G_1', aggregate='mean', activation='relu', kernel_regularizer=l2(5e-4))([H_G]+G)
    H_G = Dropout(0.5)(H_G)
    Y_G = GraphSageConv(num_tags, name='GraphSage_G_2', aggregate='mean', activation='softmax')([H_G]+G)

    H_G2 = GraphSageConv(64, name='GraphSage_G2_1', aggregate='mean', activation='relu', kernel_regularizer=l2(5e-4))([H_G]+G2)
    H_G2 = Dropout(0.5)(H_G2)
    Y_G2 = GraphSageConv(num_tags, name='GraphSage_G2_2', aggregate='mean', activation='softmax')([H_G2]+G2)

    # Combine outputs from both relations (e.g., concatenate or add)
    # Modify this line depending on how you want to combine them
    final_output =  Y_G2*0.05 + Y_G*0.95
    #print(final_output.shape)
    #final_output = tf.reshape(final_output, [500,-1])
    #print(final_output.shape)

    model = Model(inputs=[inputs_img, inputs_text, inputs_user_post, inputs_other_post, inputs_user_tag, inputs_other_tag]+G+G2, outputs=final_output)#model = Model(inputs=[inputs_img, inputs_text, inputs_user_post, inputs_other_post]+G, outputs=Y)
    sgd =  tf.keras.optimizers.SGD(learning_rate=lr, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss=myLossFunc, optimizer=sgd)

    return model

### Run

### training

In [None]:
model = modelDef(0.05)

In [None]:
if __name__ == "__main__":

    #with open("Preprocess_data/organized_np_data_%s.pkl"%(today), "rb") as f:
    #    text_total, tags_total, ids_total, user_total, batch_total= pickle.load(f)
    num = ids_total.shape[0]  # post number

    for top_K in [9]:

        #model = modelDef()
        #model.load_weights("Model_data/model_best_0925-1_2layer.h5")
        #model = tf.contrib.keras.models.load_model("Model_data/model_best_0414_2layer.h5")
        #model.load_weights("Model_data/model_best_%s_2layer.h5"%(date_num))

        batch_size = 500
        num_test = 50
        num_train = batch_size - num_test
        num_in_graph_per_step = [[i for i in range(j, j + batch_size)] for j in range(0, batch_size*num_batch, batch_size)]
        train_num_array_per_step = [[i for i in range(j, j + num_train)] for j in range(0, batch_size*num_batch, batch_size)]
        test_num_array_per_step = [[i for i in range(j, j + num_test)] for j in range(num_train, batch_size*num_batch, batch_size)]

        num_in_graph_per_step = np.array(num_in_graph_per_step)
        train_num_array_per_step = np.array(train_num_array_per_step)
        test_num_array_per_step = np.array(test_num_array_per_step)

        print("num_in_graph_per_step: ", len(num_in_graph_per_step[0]))
        print("train_num_array_per_step: ", len(train_num_array_per_step[0]))
        print("test_num_array_per_step: ", len(test_num_array_per_step[0]))
        print("Start Training. Total steps: ", num_batch)

        F = 0.0 # best F1 score
        wait = 0 # early stoping waiting round (var.)
        best_val_loss = 99999 # best loss value
        PATIENCE = 100 # early stoping waiting round number (para.)

        adj_all = []
        t_adj_all = []
        for epoch in range(num_epoch):

            #model.load_weights("Model_data/model_best_%s_2layer.h5"%(date_num))
            batch_total_loss = 0.0
            skip_list = []

            for i in range(num_batch):

                batch_idx_train = range(0, len(train_num_array_per_step[i])) # numbers of training data per batch
                batch_train_mask = sample_mask(batch_idx_train, len(num_in_graph_per_step[i])) # mask training list (true/false)

                # (batch_size,7,7,512) (batch_size,49,300) (batch_size,5,768) (batch_size,10,768) (batch_size,10,3896) (batch_size,10,3896)(batch_size,768)
                batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, batch_text_id = get_batch_feature(num_in_graph_per_step[i], text_total_pad)
                if len(adj_all) != num_batch:
                    try:
                        # caculate img cosine similarity and get adjacency matrix
                        adj = img_adj_by_newcosine(batch_img,0.6)
                        adj_ = preprocess_adj(adj, True)
                        t_adj = adj_by_newcosine(batch_text,0.95)
                        t_adj_ = preprocess_adj(t_adj, True)


                        #batch_graph = [batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, adj_]
                        batch_graph = [batch_img, batch_text_id, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, adj_, t_adj_]
                        adj_all.append(adj_)
                        t_adj_all.append(t_adj_)
                    except:
                        # if cannot get adjacency matrix
                        skip_list.append(i)
                        adj_all.append([])
                        print("oh")
                        continue
                else:
                    batch_graph = [batch_img, batch_text_id, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, adj_all[i],t_adj_all[i]]
                    #batch_graph = [batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, adj_all[i]]
                #print(batch_img.shape)
                # training data answer & testing data zero padding
                batch_train_labels = batch_train_label(tags_total, train_num_array_per_step[i], len(test_num_array_per_step[i]))

                # train_on_batch( x=[img,text,u_post,o_post,u_tag,o_tag], y=training_ans(480,3896), batch_train_mask(480))
                history  = model.train_on_batch(batch_graph, batch_train_labels, sample_weight=batch_train_mask)

                batch_total_loss += float(history)
                del batch_idx_train, batch_train_mask, batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag #,batch_train_labels



            if epoch == 0:
                cnt = 0
                for i, sparr in enumerate(adj_all):
                    cnt += np.count_nonzero(np.array(sparr.toarray()))
                print("Total img edge num: ",cnt, "         ")

                cnt = 0
                for i, sparr in enumerate(t_adj_all):
                    cnt += np.count_nonzero(np.array(sparr.toarray()))
                print("Total text edge num: ",cnt, "         ")


            # testing
            y_pred_all = []
            y_test_all = []

            for i in range(num_batch):

                # skip batches which being on skip_list
                if i in skip_list:
                    continue
                batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, batch_text_id = get_batch_feature(num_in_graph_per_step[i],text_total_pad)

                #batch_graph = [batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, adj_all[i]] #[i]
                batch_graph = [batch_img, batch_text_id, batch_u_post, batch_o_post,batch_u_tag, batch_o_tag, adj_all[i],t_adj_all[i]]
                y_pred = model.predict_on_batch(batch_graph)
                y_pred_all.append(y_pred[len(train_num_array_per_step[i]):])
                batch_test_labels = batch_test_label(tags_total, test_num_array_per_step[i])
                y_test_all.append(batch_test_labels)

                #acc, precision, recall, f1, f1_mean = evaluator(np.array(y_pred[len(train_num_array_per_step[i]):]),np.array(batch_test_labels), top_K)
                #print("batch %d, Epoch: %d, Loss: %.4f, accuracy: %.4f, precision: %.4f, recall: %.4f, f1: %.4f, f1_m: %.4f" %
                #        (i, epoch, batch_total_loss, acc, float(precision), float(recall), float(f1),float(f1_mean)))

                del batch_img, batch_text, batch_u_post, batch_o_post, batch_u_tag, batch_o_tag, batch_graph,batch_text_id

            # need to add if batch > 1
            y_pred_all = np.concatenate(y_pred_all)
            y_test_all = np.concatenate(y_test_all)

            # print the testing result
            acc, precision, recall, f1, f1_mean = evaluator(y_test_all, y_pred_all, top_K)
            print("Top %d, Epoch: %d, Loss: %.4f, accuracy: %.4f, precision: %.4f, recall: %.4f, f1: %.4f, f1_m: %.4f" %
                    (top_K, epoch, batch_total_loss, acc, float(precision), float(recall), float(f1),float(f1_mean)))

            # if getting the best f1 score than before, save model weights
            if f1_mean >= F:
                model.save_weights("Model_data/model_best_%s_2layer.h5"%(date_num))
                res_file = open("Model_data/record_gcn_%s_2layer.txt"%(date_num), "a")
                string = "*Top %d, Epoch: %d,accuracy: %.4f, precision: %.4f, recall: %.4f, f1: %.4f \n" % (
                    top_K, epoch, acc, float(precision), float(recall), float(f1_mean))
                res_file.write(string)
                res_file.close()
                F = f1_mean
            else:
                res_file = open("Model_data/record_gcn_%s_2layer.txt"%(date_num), "a")
                string = "Top %d, Epoch: %d,accuracy: %.4f, precision: %.4f, recall: %.4f, f1: %.4f \n" % (
                    top_K, epoch, acc, float(precision), float(recall), float(f1_mean))
                res_file.write(string)
                res_file.close()

            # Early stopping
            if batch_total_loss < best_val_loss:
                best_val_loss = batch_total_loss
                wait = 0
            else:
                if wait >= PATIENCE:
                    print('Epoch {}: early stopping'.format(epoch))
                    break
                wait += 1

    print("Training Process Completed.")

