In [5]:
import torch
from torch_geometric.data import Data
from torch.utils.data import Dataset
import json
from gensim.models.keyedvectors import KeyedVectors
from torch import nn
import numpy as np
from torch.utils.data import DataLoader
from data import collate
from ABGCN import *

from data import *
dataset = semEval2017Dataset(
    dataPath='../dataset/semeval2017-task8/', 
    type='train'
)
word2vec = KeyedVectors.load_word2vec_format(
    '../dataset/glove/glove.twitter.27B.200d.gensim.txt',
    binary=False
)

vectorSize = word2vec.vector_size
word2vec.add_vectors(["<start>", "<end>", "<unk>"] ,np.random.randn(3, vectorSize))
with open('../dataset/semeval2017-task8/wordList.json', 'r') as f:
    content = f.read()
wordList = ["<unk>", "<start>", "<end>"]
wordList += (json.loads(content)).keys()
word2index = {}
index = 1
for word in wordList:
    if word in word2vec:
        word2index[word] = index
        index += 1

dataset = semEval2017Dataset(
    dataPath = '../dataset/semeval2017-task8/', 
    type = 'test'
)
loader = DataLoader(dataset, shuffle = True, num_workers = 4, collate_fn=collate)
with open('../dataset/semeval2017-task8/' + 'trainSet.json', 'r') as f:
    content = f.read()
rawDataset = json.loads(content)
label2IndexRumor = copy(rawDataset['label2IndexRumor'])
label2IndexStance = copy(rawDataset['label2IndexStance'])

In [6]:
device = torch.device('cuda')
model = ABGCN(
    word2vec = word2vec,
    word2index = word2index,
    s2vDim = 256,
    gcnHiddenDim = 512,
    rumorFeatureDim = 512,
    numRumorTag = len(label2IndexRumor),
    numStanceTag = len(label2IndexStance),
    dropout = 0.2)
model = model.set_device(device)
model.load('./model/model.pt')
loss_func = torch.nn.CrossEntropyLoss(reduction='mean').to(device)
print(model)

ABGCN(
  (embed): Embedding(7696, 200, padding_idx=0)
  (wordAttention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=200, out_features=200, bias=True)
  )
  (s2vFc): Linear(in_features=200, out_features=256, bias=True)
  (biGCN): BiGCN(
    (TDGCN): GCN(
      (conv1): GCNConv(512, 512)
      (conv2): GCNConv(1024, 512)
    )
    (BUGCN): GCN(
      (conv1): GCNConv(512, 512)
      (conv2): GCNConv(1024, 512)
    )
    (fc): Linear(in_features=2048, out_features=3, bias=True)
  )
  (RumorFc): Linear(in_features=2048, out_features=3, bias=True)
  (stanceAttention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
  )
  (stanceFc): Linear(in_features=256, out_features=4, bias=True)
)


In [7]:
from torch.nn.functional import softmax
from sklearn.metrics import f1_score
model.eval()
rumorTrue = []
rumorPre = []
stanceTrue = []
stancePre = []
totalLoss = 0.
for thread in loader:
    rumorTag = thread['rumorTag'].to(device)
    rumorTrue += thread['rumorTag'].tolist()
    stanceTag = thread['stanceTag'].to(device)
    stanceTrue += thread['stanceTag'].tolist()

    nodeText = thread['nodeText']
    for i in range(len(nodeText)):
        indexList = []
        for word in nodeText[i]:
            if word in word2index:
                indexList.append(word2index[word])
            elif word != '':
                indexList.append(word2index['<unk>'])
        nodeText[i] = torch.IntTensor(indexList).to(device)
    nodeText = pad_sequence(nodeText, padding_value=0, batch_first=True)
    thread['nodeText'] = nodeText
    
    rumorPredict, stancePredict = model.forward(thread)
    loss = loss_func(rumorPredict, rumorTag) + loss_func(stancePredict, stanceTag)
    totalLoss += loss

    rumorPredict = softmax(rumorPredict, dim=1)
    rumorPre += rumorPredict.max(dim=1)[1].tolist()
    stancePredict = softmax(stancePredict, dim=1)
    stancePre += stancePredict.max(dim=1)[1].tolist()

microF1Rumor = f1_score(rumorTrue, rumorPre, labels=[0,1,2], average='micro')
macroF1Rumor = f1_score(rumorTrue, rumorPre, labels=[0,1,2], average='macro')
accRumor = (np.array(rumorTrue) == np.array(rumorPre)).sum() / len(rumorPre)
microF1Stance = f1_score(stanceTrue, stancePre, labels=[0,1,2,3], average='micro')
macroF1Stance = f1_score(stanceTrue, stancePre, labels=[0,1,2,3], average='macro')
accStance = (np.array(stanceTrue) == np.array(stancePre)).sum() / len(stancePre)

print('average joint-loss: {:f}'.format(totalLoss / len(loader)))
print('rumor detection:')
print('accuracy: {:f}, micro-f1: {:f}, macro-f1: {:f}'.format(
    accRumor, 
    microF1Rumor, 
    macroF1Rumor
))
print('stance analyze:')
print('accuracy: {:f}, micro-f1: {:f}, macro-f1: {:f}'.format(
    accStance, 
    microF1Stance, 
    macroF1Stance
))

average joint-loss: 44.079887
rumor detection:
accuracy: 0.392857, micro-f1: 0.392857, macro-f1: 0.390324
stance analyze:
accuracy: 0.553861, micro-f1: 0.553861, macro-f1: 0.300267
