In [1]:
import torch
from torch_geometric.data import Data
from torch.utils.data import Dataset
import json
from gensim.models.keyedvectors import KeyedVectors
from torch import nn
import numpy as np

In [2]:
from data import *
dataset = semEval2017Dataset(
    dataPath='../dataset/semeval2017-task8/', 
    type='train'
)
glove25d = KeyedVectors.load_word2vec_format(
    '../dataset/glove/glove.twitter.27B.25d.gensim.txt',
    binary=False
)

In [3]:
vectorSize = glove25d.vector_size
glove25d.add_vectors(["<start>", "<end>", "<unk>"] ,np.random.randn(3, vectorSize))
with open('../dataset/semeval2017-task8/wordList.json', 'r') as f:
    content = f.read()
wordList = ["<unk>", "<start>", "<end>"]
wordList += (json.loads(content)).keys()
word2index = {}
index = 1
for word in wordList:
    if word in glove25d:
        word2index[word] = index
        index += 1

In [4]:
from torch.utils.data import DataLoader
from data import collate
loader = DataLoader(
    dataset,
    shuffle = True,
    num_workers = 4,
    collate_fn = collate
)

In [14]:
from ABGCN import *
from torch import optim
model = ABGCN(
    word2vec = glove25d,
    word2index = word2index,
    s2vDim = 64, # 使用的句嵌入的维度
    gcnHiddenDim = 64, # GCN隐藏层的维度（GCNconv1的输出维度）
    rumorFeatureDim = 64, # GCN输出层的维度
    numRumorTag = 3, # 谣言标签种类数
    numStanceTag = 4, # 立场标签种类数
    numHeads = 5
)
device = torch.device('cpu')
model = model.set_device(device)
loss_func = torch.nn.CrossEntropyLoss(reduction='mean').to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [16]:

from tqdm import tqdm
from torch.nn.functional import softmax
from sklearn.metrics import f1_score

task = 2
true = []
pre = []
totalLoss = 0.
for epoch in range(1, 2):
    model.train()
    for thread in loader:
        if task == 1:
            tag = thread['rumorTag'].to(device)
            true += thread['rumorTag'].tolist()
        else:
            tag = thread['stanceTag'].to(device)
            true += thread['stanceTag'].tolist()
        
        
        nodeText = thread['nodeText']
        for i in range(len(nodeText)):
            indexList = []
            for word in nodeText[i]:
                if word in word2index:
                    indexList.append(word2index[word])
                elif word != '':
                    indexList.append(word2index['<unk>'])
            nodeText[i] = torch.IntTensor(indexList).to(device)
        nodeText = pad_sequence(nodeText, padding_value=0, batch_first=True)
        thread['nodeText'] = nodeText

        optimizer.zero_grad()
        predict = model.forward(thread, task)
        loss = loss_func(predict, tag)
        totalLoss += loss
        loss.backward()
        optimizer.step()
        
        predict = softmax(predict, dim=1)
        pre += predict.max(dim=1)[1].tolist()
    macroF1 = f1_score(true, pre, labels=[0,1,2,3], average='macro')
    acc = (np.array(true) == np.array(pre)).sum() / len(true)
    print(totalLoss / len(loader), acc, macroF1)


tensor(0.9258, grad_fn=<DivBackward0>) 0.6873525247758376 0.32475975889635295
