In [1]:
import torch
from torch_geometric.data import Data
from torch.utils.data import Dataset
import json
from gensim.models.keyedvectors import KeyedVectors
from torch import nn
import numpy as np

In [2]:
from data import *
dataset = semEval2017Dataset(
    dataPath='../dataset/semevalRemake/', 
    type='train'
)
glove25d = KeyedVectors.load_word2vec_format(
    '../dataset/glove/glove.twitter.27B.25d.gensim.txt',
    binary=False
)

In [3]:
vectorSize = glove25d.vector_size
glove25d.add_vectors(["<start>", "<end>", "<unk>"] ,np.random.randn(3, vectorSize))
with open('../dataset/semevalRemake/wordList.json', 'r') as f:
    content = f.read()
wordList = ["<unk>", "<start>", "<end>"]
wordList += (json.loads(content)).keys()
word2index = {}
index = 1
for word in wordList:
    if word in glove25d:
        word2index[word] = index
        index += 1

In [4]:
from torch.utils.data import DataLoader
from data import collate
loader = DataLoader(
    dataset,
    shuffle = True,
    num_workers = 1,
    collate_fn = collate
)

In [5]:
from MSABiGCN import *
from torch import optim
model = MSABiGCN(
    word2vec = glove25d,
    word2index = word2index,
    s2vDim = 64, # 使用的句嵌入的维度
    gcnHiddenDim = 64, # GCN隐藏层的维度（GCNconv1的输出维度）
    rumorFeatureDim = 64, # GCN输出层的维度\
    numStanceTag = 4,
    numRumorTag = 3, # 谣言标签种类数
    edgeDropRate = 0.2
)
device = torch.device('cuda')
model = model.set_device(device)
loss_func = torch.nn.CrossEntropyLoss(reduction='mean').to(device)
optimizer = optim.AdamW(model.parameters(), lr=3e-5)

In [6]:

from tqdm import tqdm
from torch.nn.functional import softmax
from sklearn.metrics import f1_score

rumorTrue = []
rumorPre = []
stanceTag = []
stanceTrue = []
totalLoss = 0.

for epoch in range(1, 2):
    model.train()
    for thread in tqdm(
        iter(loader), 
        desc="[epoch: {:d}] ".format(epoch), 
        leave=False, 
        ncols=100
    ):
        rumorTag = thread['rumorTag'].to(device)
        rumorTrue += thread['rumorTag'].tolist()
        stanceTag = thread['stanceTag'].to(device)
        stanceTrue += thread['stanceTag'].tolist()
        
        nodeText = thread['nodeText']
        for i in range(len(nodeText)):
            indexList = []
            for word in nodeText[i]:
                if word in word2index:
                    indexList.append(word2index[word])
                elif word != '':
                    indexList.append(word2index['<unk>'])
            nodeText[i] = torch.IntTensor(indexList).to(device)
        nodeText = pad_sequence(nodeText, padding_value=0, batch_first=True)
        thread['nodeText'] = nodeText

        optimizer.zero_grad()
        rumorPredict, stancePredict = model.forward(thread)
        loss = loss_func(rumorPredict, rumorTag) + loss_func(stancePredict, stanceTag)
        totalLoss += loss
        loss.backward()
        optimizer.step()


                                                                                                    