In [1]:
import json
import torch
import argparse
import numpy as np
from data import *
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import f1_score
from torch import optim
from torch.nn.functional import softmax
from torch.utils.data import DataLoader
from model import *
from utils import *
from random import randint, shuffle
from tqdm import tqdm

In [2]:
dataPath = '../dataset/PHEME/'

tfidf_vec, label2IndexRumor, label2IndexStance = getTfidfAndLabel(dataPath)
inputDim = tfidf_vec.max_features

trainSet = PHEMEDataset(
    dataPath = dataPath, 
    type = 'train'
)
testSet = PHEMEDataset(
    dataPath = dataPath,
    type = 'test'
)
# trainSet = semEval2017Dataset(
#     dataPath = dataPath, 
#     type = 'train'
# )
# testSet = semEval2017Dataset(
#     dataPath = dataPath,
#     type = 'test'
# )
trainLoader = DataLoader(trainSet, collate_fn=collate, shuffle=True)
testLoader = DataLoader(testSet, collate_fn=collate, shuffle=True)

In [5]:
device = torch.device('cuda')
model = MTUS(
    embeddingDim=100, 
    hiddenDim=100,
    inputDim=inputDim, 
    numGRULayer=1,
    numRumorClass=len(label2IndexRumor),
    numStanceClass=len(label2IndexStance) if label2IndexStance != None else 4
)
model.set_device(device)
from torch import optim
from torch.nn.functional import softmax
loss_func = torch.nn.CrossEntropyLoss(reduction='mean').to(device)
optimizer = optim.AdamW(model.parameters(), lr=3e-4,weight_decay=5e-4)

In [6]:
start = 1
for epoch in range(start, start + 1):
    model.train()
    totalLoss = 0.
    rumorTrue = []
    rumorPre = []
    for thread in trainLoader:
        #post = tfidf_vec.transform(thread['thread']).toarray()
        posts = torch.Tensor(tfidf_vec.transform(thread['thread']).toarray()).to(device)
        rumorTrue += thread['rumorTag'].tolist()
        rumorTag = thread['rumorTag'].to(device)
        optimizer.zero_grad()

        p = model.forwardRumor(posts)
        loss = loss_func(p, rumorTag)
        totalLoss += loss
        loss.backward()
        optimizer.step()
        p = softmax(p, 1)
        rumorPre += p.max(dim=1)[1].tolist()

In [5]:
start = 1
for epoch in range(start, start + 1):
    model.train()
    totalLoss = 0.
    stanceTrue = []
    stancePre = []
    for thread in trainLoader:
        posts = torch.Tensor(tfidf_vec.transform(thread['thread']).toarray()).to(device)
        stanceTrue += thread['stanceTag'].tolist()
        stanceTag = thread['stanceTag'].to(device)
        optimizer.zero_grad()

        p = model.forwardStance(posts)
        loss = loss_func(p, stanceTag)
        totalLoss += loss
        loss.backward()
        optimizer.step()
        p = softmax(p, 1)
        #stancePre += p.max(dim=1)[1].tolist()
        print(p.max(dim=1)[1].tolist())
        break

torch.Size([55, 5000])
tensor([[-0.1444, -0.3136,  0.0930,  0.3046],
        [-0.0190, -0.1767,  0.0787,  0.3317],
        [ 0.0487, -0.1463,  0.0595,  0.2973],
        [ 0.0882, -0.1504,  0.0474,  0.2594],
        [ 0.1061, -0.1638,  0.0460,  0.2342],
        [ 0.1186, -0.1774,  0.0469,  0.2159],
        [ 0.1179, -0.1863,  0.0481,  0.2091],
        [ 0.1203, -0.1917,  0.0523,  0.2046],
        [ 0.1203, -0.1964,  0.0522,  0.2015],
        [ 0.1175, -0.2037,  0.0528,  0.1959],
        [ 0.1174, -0.2035,  0.0542,  0.1970],
        [ 0.1170, -0.2083,  0.0538,  0.1917],
        [ 0.1160, -0.2087,  0.0535,  0.1913],
        [ 0.1146, -0.2134,  0.0544,  0.1877],
        [ 0.1137, -0.2115,  0.0547,  0.1895],
        [ 0.1146, -0.2174,  0.0568,  0.1822],
        [ 0.1149, -0.2158,  0.0558,  0.1880],
        [ 0.1137, -0.2152,  0.0547,  0.1870],
        [ 0.1132, -0.2146,  0.0554,  0.1862],
        [ 0.1124, -0.2156,  0.0547,  0.1873],
        [ 0.1094, -0.2136,  0.0557,  0.1910],
        [ 0

In [20]:
from sklearn.metrics import f1_score
import numpy as np
y_true = np.array([0,1,2,2,1,0])
y_pred = np.array([0,0,0,1,0,1])
(y_true == y_pred).sum()

1