In [1]:
import json
dataSetPath = '../dataset/semeval2017-task8/'
with open(dataSetPath + 'trainSet.json', 'r') as f:
    content = f.read()
trainSet = json.loads(content)

In [2]:
trainSet.keys()

dict_keys(['postIds', 'stanceTag', 'threadIds', 'rumorTag', 'structures', 'posts', 'label2IndexRumor', 'label2IndexStance', 'index2LabelRumor', 'index2LabelStance'])

In [3]:
import torch
from math import log
from utils import *
from  sklearn.feature_extraction.text import TfidfVectorizer
textNum = len(trainSet['posts'])
threads = []
rumorTags = []
stanceTags = []
for threadId in trainSet['threadIds']:
    thread = {str(trainSet['posts'][threadId]['time']): trainSet['posts'][threadId]['text']}
    structure = trainSet['structures'][threadId]
    ids = flattenStructure(structure)
    stanceTag = []
    for id in ids:
        if id in trainSet['posts']:
            thread[str(trainSet['posts'][id]['time'])] = trainSet['posts'][id]['text']
            stanceTag.append(trainSet['label2IndexStance'][trainSet['stanceTag'][id]])
    # post按照时间先后排序
    thread = sorted(thread.items(), key=lambda d: d[0])
    threads.append(thread)
    rumorTags.append(torch.LongTensor([trainSet['label2IndexRumor'][trainSet['rumorTag'][threadId]]]))
    stanceTags.append(torch.LongTensor(stanceTag))
cropus = []
for thread in threads:
    for _, text in thread:
        cropus.append(text)
tfidf_vec = TfidfVectorizer()
tfidf_matrix = tfidf_vec.fit_transform(cropus).toarray()
counter = 0
for i in range(len(threads)):
    tfidf = []
    for _, _ in threads[i]:
        tfidf.append(tfidf_matrix[counter])
        counter += 1
    threads[i] = torch.Tensor(tfidf)


In [4]:
import torch
import torch.nn as nn
from torch.nn.modules.module import Module

class MTUS(nn.Module):
    def __init__(self, embeddingDim: int, hiddenDim: int, inputDim: int, 
                 numGRULayer: int, numRumorClass: int, numStanceClass: int,
                 batchSize = 1, bidirectional = False):
        super().__init__() # 调用nn.Moudle父类的初始化方法
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 优先使用cuda
        self.embeddingDim = embeddingDim
        self.hiddenDim = hiddenDim
        self.batchSize = batchSize
        self.bidirectional = bidirectional
        
        # embedding使用线性层来把tf-idf向量转换成句嵌入
        self.embeddingRumor = nn.Linear(inputDim, embeddingDim)
        self.embeddingStance = nn.Linear(inputDim, embeddingDim)
        
        # 共享GRU层
        self.shareGRU = nn.GRU(embeddingDim, hiddenDim, numGRULayer, bidirectional = self.bidirectional)
        self.h0 = nn.Parameter(torch.randn((numGRULayer, self.batchSize, hiddenDim)))

        # 把GRU的隐状态映射成概率
        self.vRumor = nn.Linear(hiddenDim, numRumorClass)
        self.v1Stance = nn.Linear(hiddenDim, numStanceClass)
        self.vStance = nn.Linear(hiddenDim, numStanceClass)

    # 训练集前向传递，返回对特定任务的概率向量/矩阵
    def forwardRumor(self, sentences: torch.Tensor):
        seqLen = sentences.size()[0]
        embeddings = self.embeddingRumor(sentences).view(seqLen, self.batchSize, self.embeddingDim) # view是为了适配gru的输入样式
        gruOut, _ = self.shareGRU(embeddings, self.h0) # hs(seqLen, batch, numDirection * hiddenDim), ht(numLayers*numDirections, batch, hiddenDim)
        ht = gruOut[gruOut.size()[0] - 1].view(self.batchSize, self.hiddenDim) # 取出最后一层的隐状态
        p = self.vRumor(ht)
        return p # 返回的概率矩阵是包含batch维度的size():(batch, numDirection)
    
    def forwardStance(self, sentences: torch.Tensor):
        seqLen = sentences.size()[0]
        embeddings = self.embeddingRumor(sentences).view(seqLen, self.batchSize, self.embeddingDim)
        hs, _ = self.shareGRU(embeddings, self.h0)# hs(seqLen, batch, numDirection * hiddenDim)
        ps = self.v1Stance(hs[0]) + self.vStance(hs)
        return ps

    # 更换计算设备
    def set_device(self, device: torch.device) -> torch.nn.Module:
        _model = self.to(device)
        _model.device = device
        return _model

    # 保存模型
    def save(self, path: str):
        pass
    # 加载模型
    def load(self, path: str):
        pass
    
mtus = MTUS(embeddingDim=100, hiddenDim=100, inputDim=threads[0].size()[1],
            numGRULayer=2, numRumorClass=3, numStanceClass=4)

In [18]:
from torch import optim
from torch.nn.functional import softmax
loss_func = torch.nn.CrossEntropyLoss()
optimizer = optim.Adagrad(mtus.parameters(), lr=1e-2)

In [20]:
x = threads[0]
rumorTag = rumorTags[0]
stanceTag = stanceTags[0]
p = mtus.forwardRumor(x)
print(p, rumorTag, stanceTag)
loss = loss_func(p, rumorTag)
loss.backward()
optimizer.step()
p = softmax(p, 1)
print(p)

tensor([[ 1.8539, -0.2012, -1.0214]], grad_fn=<AddmmBackward>) tensor([0]) tensor([0, 3, 3, 3, 0])
tensor([[0.8442, 0.1081, 0.0476]], grad_fn=<SoftmaxBackward>)
