In [1]:
import torch
from torch_geometric.data import Data
from torch.utils.data import Dataset
import json
from gensim.models.keyedvectors import KeyedVectors
import numpy as np
from utils import flattenStructure

In [2]:
from data import *
dataset = semeval2017Dataset(dataPath='../dataset/semeval2017-task8/', 
                             type='train',
                             w2vPath='../dataset/glove/',
                             w2vDim=25)

In [3]:
from torch.utils.data import DataLoader
loader = DataLoader(dataset, shuffle=True)

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from biGCN import *


class GCN(torch.nn.Module):
    def __init__(self, inputDim, hiddenDim, outDim, dropout=0.):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(inputDim, hiddenDim)
        self.conv2 = GCNConv(hiddenDim + inputDim, outDim)
        self.dropout = dropout

    def forward(self, data):
        posts, edge_index, rootIndex = data.x, data.edgeIndex, data.rootIndex # posts(n, inputDim), edgeIndex(2, |E|)
        
        conv1Out = self.conv1(posts, edge_index)
        postRoot = torch.clone(posts[rootIndex])
        postRoot = postRoot.repeat(posts.shape[0], 1)
        conv1Root = conv1Out[rootIndex]

        conv2In = torch.cat([conv1Out, postRoot], dim=1)
        conv2In = F.relu(conv2In)
        conv2In = F.dropout(conv2In, training=self.training) # BiGCN对于dropout的实现，一次卷积之后随机舍弃一些点
        conv2Out = self.conv2(conv2In, edge_index)
        conv2Out = F.relu(conv2Out)

        conv1Root = conv1Root.repeat(posts.shape[0], 1)
        feature = torch.cat([conv1Root, conv2Out], dim=1)
        # 使用均值计算，把所有节点的特征聚合成为图的特征
        feature = torch.mean(feature, dim=0).view(1, -1)
        return feature
    
    # 更换计算设备
    def set_device(self, device: torch.device) -> torch.nn.Module:
        _model = self.to(device)
        _model.device = device
        return _model
    # 保存模型
    def save(self, path: str):
        torch.save(self.state_dict(), path)
    # 加载模型
    def load(self, path: str):
        self.load_state_dict(torch.load(path))

class BiGCN(torch.nn.Module):
    def __init__(self, inputDim, hiddenDim, convOutDim, NumRumorTag):
        super(BiGCN, self).__init__()
        self.TDGCN = GCN(inputDim, hiddenDim, convOutDim)
        self.BUGCN = GCN(inputDim, hiddenDim, convOutDim)
        self.fc=torch.nn.Linear((convOutDim + hiddenDim) * 2, NumRumorTag)

    def forward(self, dataTD, dataBU):
        TDOut = self.TDGCN(dataTD)
        BUOut = self.BUGCN(dataBU)
        feature = torch.cat((TDOut, BUOut), dim=1)
        p = self.fc(feature)
        return p

    # 更换计算设备
    def set_device(self, device: torch.device) -> torch.nn.Module:
        _model = self.to(device)
        _model.device = device
        return _model
    # 保存模型
    def save(self, path: str):
        torch.save(self.state_dict(), path)
    # 加载模型
    def load(self, path: str):
        self.load_state_dict(torch.load(path))

class ABGCN(nn.Module):
    def __init__(self,
                 w2vDim: int, # 使用的词嵌入的维度
                 s2vDim: int, # 使用的句嵌入的维度
                 gcnHiddenDim: int, # GCN隐藏层的维度（GCNconv1的输出维度）
                 rumorFeatureDim: int, # GCN输出层的维度
                 numRumorTag: int, # 谣言标签种类数
                 numStanceTag: int, # 立场标签种类数
                 s2vMethon = 'a', # 获取据嵌入的方法（l:lstm; a:attention）
                 numLstmLayer = 1 # lstm层数，仅在s2vMethod == 'l'时有效
                ):
        super().__init__()

        self.w2vDim = w2vDim
        self.s2vDim = s2vDim
        self.gcnHiddenDim = gcnHiddenDim
        self.rumorFeatureDim = rumorFeatureDim
        self.s2vMethon = s2vMethon
        self.batchSize = 1 # 实际上，由于不会写支持batch化的GCN，模型时不支持batch化训练的
        self.numRumorTag = numRumorTag
        self.numStanceTag = numStanceTag
        self.device = 'cpu'

        # ==使用biLSTM获取post的向量表示==
        if self.s2vMethon == 'l':
            self.numLstmLayer = numLstmLayer
            self.lstm = nn.LSTM(input_size = self.w2vDim,
                                hidden_size = self.s2vDim,
                                num_layers = self.numLstmLayer,
                                bidirectional=True)
            self.h0 = nn.Parameter(torch.randn((self.numLstmLayer * 2, self.batchSize, self.s2vDim)))
            self.c0 = nn.Parameter(torch.randn((self.numLstmLayer * 2, self.batchSize, self.s2vDim)))
            self.s2vDim *= 2 # 由于使用BiDirect所以s2vDim的维度会扩大1倍
        # ==使用Attention==
        else:
            pass
        
        self.biGCN = BiGCN(self.s2vDim, self.gcnHiddenDim, self.rumorFeatureDim, self.numRumorTag)

    def forwardRumor(self, data):
        # 各节点的特征表示，此时是word2vec的形式
        nodeFeature = data['nodeFeature']

        # 把w2v转化成s2v作为节点的特征
        s2v = []
        if self.s2vMethon == 'l':
            for w2v in nodeFeature:
                w2v = w2v.view((len(w2v), 1, -1)).to(self.device)
                sentenceHidden, _ = self.lstm(w2v, (self.h0, self.c0))
                # 仅取出最后一层的隐状态作为s2v
                s2v.append(sentenceHidden[-1].view(1, len(w2v[-1]), -1))
            s2v = torch.cat(s2v, dim=0)
        else:
            pass

        # GCN处理
        s2v = s2v.view(s2v.shape[0], -1)
        dataTD = Data(x = s2v.to(self.device), 
                      edgeIndex = data['edgeIndexTD'].to(self.device), 
                      rootIndex = data['threadIndex'])
        dataBU = Data(x = s2v.to(self.device), 
                      edgeIndex = data['edgeIndexBU'].to(self.device), 
                      rootIndex = data['threadIndex'])
        p = self.biGCN(dataTD, dataBU).view(self.batchSize, -1) # p.shape = (1, *)
        
        return p
        

    # 更换计算设备
    def set_device(self, device: torch.device) -> torch.nn.Module:
        _model = self.to(device)
        _model.device = device
        return _model
    # 保存模型
    def save(self, path: str):
        torch.save(self.state_dict(), path)
    # 加载模型
    def load(self, path: str):
        self.load_state_dict(torch.load(path))

In [5]:
# from ABGCN import *
from torch import optim
model = ABGCN(w2vDim = 25,
              s2vDim = 128,
              gcnHiddenDim = 128,
              rumorFeatureDim = 128,
              numRumorTag = 3,
              numStanceTag = 4,
              s2vMethon = 'l')
device = torch.device('cuda')
model = model.set_device(device)
loss_func = torch.nn.CrossEntropyLoss(reduction='mean').to(device)
optimizer = optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-2, momentum=0.9)
        

In [51]:
from tqdm import tqdm
from torch.nn.functional import softmax

totalLoss = 0.
testdata = dataset[0]
for data in tqdm(iter(loader), 
                 desc="[epoch {:d}, rumor]".format(1),
                 leave=False, 
                 ncols=100):
    # 抹除dataloader生成batch时对数据的升维
    data['threadId'] = data['threadId'][0]
    data['threadIndex'] = data['threadIndex'][0]
    for i in range(len(data['nodeFeature'])):
        data['nodeFeature'][i] = data['nodeFeature'][i].view((data['nodeFeature'][i].shape[1], data['nodeFeature'][i].shape[2]))
    data['edgeIndexTD'] = data['edgeIndexTD'].view((data['edgeIndexTD'].shape[1], data['edgeIndexTD'].shape[2]))
    data['edgeIndexBU'] = data['edgeIndexBU'].view((data['edgeIndexBU'].shape[1], data['edgeIndexBU'].shape[2]))
    data['rumorTag'] = data['rumorTag'].view((data['rumorTag'].shape[1]))
    data['stanceTag'] = data['stanceTag'].view((data['stanceTag'].shape[1]))
    rumorTag = data['rumorTag'].to(device)

    optimizer.zero_grad()
    p = model.forwardRumor(data)
    loss = loss_func(p, rumorTag)
    totalLoss += loss
    loss.backward()
    optimizer.step()

    p = softmax(p, dim=1)


                                                                                                    

In [52]:
print(totalLoss)

tensor(280.0627, device='cuda:0', grad_fn=<AddBackward0>)


In [54]:
print(model.biGCN.feature)

AttributeError: 'BiGCN' object has no attribute 'feature'

In [48]:
print(loss)

tensor(0.9175, device='cuda:0', grad_fn=<NllLossBackward0>)
