In [1]:
import json
import pickle
import os; import sys; import logging
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import scipy.sparse as spwords
import time
from datetime import datetime
import random
import copy; import pdb
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

In [2]:
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s <%(levelname)s> %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

In [3]:
# ============================
# Data File Path
# ============================

# New Path(server) (20.08.11)
TRAIN_DATA_FILE_PATH = "./parsed_data/node_edge_info_train_withid.json"
VALIDATION_DATA_FILE_PATH = "./parsed_data/node_edge_info_valid_withid.json"
TEST_DATA_FILE_PATH = "./parsed_data/node_edge_info_test_withid.json"
PRE_TRAINED_WORD_EMBEDDING_FILE_PATH = "./parsed_data/glove.840B.300d.txt"
GENRE_DICT = "./parsed_data/genre_dict.pickle"
tv_train_path = './tvtropes_data/train_parsed.pickle'
tv_test_path = './tvtropes_data/test_parsed.pickle'
tv_train = pickle.load(open(tv_train_path, 'rb'))
tv_test = pickle.load(open(tv_test_path, 'rb'))

# PRE_TRAINED_WORD_EMBEDDING_FILE_PATH = "./parsed_data/glove.6B.300d.txt"

# ============================
# Model Hyper Parameter
# ============================
EMBEDDING_DIM = 300
GENRE_EMBEDDING_DIM = 50
HIDDEN_STATES = [100, 100]
NUM_FILTERS = 50
FILTER_SIZES = [2,3]
NUM_HEADS = 3
LEAKY_ALPHA = 0.2

# ============================
# Training Hyper Parameter
# ============================
EPOCHS = 100
LEARNING_RATE = 0.001
BATCH_SIZE = 1024
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.5
RANDOM_SEED = 26

# ============================
# Set Random Seed
# ============================
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

In [4]:
# ============================
# Data Pre-Processing
# ============================
def load_data(train_file_path, validation_file_path, test_file_path, genre_dict_file):
    with open(train_file_path) as f:
        train = json.load(f)
    with open(validation_file_path) as f:
        validation = json.load(f)
    with open(test_file_path) as f:
        test = json.load(f)
    with open(genre_dict_file, "rb") as f:
        genre_dict = pickle.load(f)

    return train, validation, test, genre_dict

# ============================
# Data Pre Processing
# ============================
logger.info("Load Data...")
start = time.time()
train, validation, test, genre_dict = load_data(TRAIN_DATA_FILE_PATH,
                                                VALIDATION_DATA_FILE_PATH,
                                                TEST_DATA_FILE_PATH,
                                                GENRE_DICT)

2020-09-25 10:40:18,137 - root <INFO> Load Data...


In [5]:
def check(review):
    for line in review:
        a = len(line.split("\t")[0].split())
        if a>50:
            return False
    return True

def make_dictionary(_train, _validation, _test, genre_dict):
    train, validation, test = [], [], []
    for line in tqdm(_train):
        if check(line["text_info"]):
            train += line["text_info"]
    for line in tqdm(_validation):
        if check(line["text_info"]):
            validation += line["text_info"]
    for line in tqdm(_test):
        if check(line["text_info"]):
            test += line["text_info"]

    data = []
    data += train
    data += validation
    data += test

    global maximum_length
    # maximum_length = 50
    maximum_length = max([len(line.split("\t")[0].split()) for line in data])
    global maximum_genre_length
    maximum_genre_length = max([len(value) for _, value in genre_dict.items()])

    word2id = {"<PAD>":0}
    id2word = ["<PAD>"]
    edge2id = {"<NONE>":0, "<SELF>": 1}
    id2edge = ["<NONE>", "<SELF>"]
    genre2id = {"<PAD>":0}
    id2genre = ["<PAD>"]

    count = 0
    b = 0

    for line in tqdm(data):
        tokens = line.split("\t")
        b += len(tokens[0].split())
        if len(tokens[0].split()) > 50:
            count+=1
        for word in tokens[0].split():
            if word not in word2id:
                word2id[word] = len(word2id)
                id2word.append(word)
        for edges in tokens[3:]:
            _tokens = edges.split(":")
            if len(_tokens) != 3:
                start, end = _tokens[0], _tokens[1]
                edge = ":".join(_tokens[2:])
            else:
                start, end, edge = _tokens
            if edge not in edge2id:
                edge2id[edge] = len(edge2id)
                id2edge.append(edge)
            del _tokens
        del tokens
    del data, _train, _validation, _test

    book2genre = {}
    for key, value in genre_dict.items():
        for genre in value:
            if genre not in genre2id:
                genre2id[genre] = len(genre2id)
                id2genre.append(genre)
        book2genre[key] = [genre2id[genre] for genre in value]

    num_edges = len(edge2id)
    for i in range(num_edges):
        key = id2edge[i]
        if key != "<NONE>" and key != "<SELF>":
            opposite = key+"'"
            edge2id[opposite] = edge2id[key]+num_edges-2

    print('count, b: {}, {}'.format(count, b))
    return train, validation, test, word2id, id2word, edge2id, id2edge, genre2id, id2genre, book2genre, maximum_length, maximum_genre_length

logger.info("Make Dictionary...")
_train, _validation, _test, word2id, id2word, edge2id, id2edge, genre2id, id2genre, book2genre, maximum_length, maximum_genre_length = make_dictionary(train, validation, test, genre_dict)
book2genre[24711433] = []

print('train: {}, valid: {}, test: {}, overall: {}, max_len: {}'.format(len(_train), len(_validation), len(_test), len(_train) + len(_validation) + len(_test), maximum_length))

2020-09-25 10:41:12,363 - root <INFO> Make Dictionary...
100%|██████████| 1091952/1091952 [00:25<00:00, 43101.02it/s]
100%|██████████| 10000/10000 [00:00<00:00, 42997.82it/s]
100%|██████████| 276081/276081 [00:06<00:00, 44009.41it/s]
100%|██████████| 12911731/12911731 [02:43<00:00, 79039.56it/s]


count, b: 0, 219536978
train: 10230700, valid: 93500, test: 2587531, overall: 12911731, max_len: 50


In [6]:
def make_input_data_as_index(_data, word2id, edge2id):
    data = []
    for line in tqdm(_data):
        tokens = line.split("\t")
        sentence, label, book = tokens[0], int(tokens[1]), tokens[2]
        _edges = []
        for edges in tokens[3:]:
            _tokens = edges.split(":")
            if len(_tokens) != 3:
                start, end = _tokens[0], _tokens[1]
                edge = ":".join(_tokens[2:])
            else:
                start, end, edge = _tokens
            _edges.append(":".join([start, end, str(edge2id[edge])]))
            del _tokens
        data.append([sentence, label, " ".join(_edges), book])
        del tokens
    return data

logger.info("Make Input as Index...")
start = time.time()
_train = make_input_data_as_index(_train, word2id, edge2id)
_validation = make_input_data_as_index(_validation, word2id, edge2id)
_test = make_input_data_as_index(_test, word2id, edge2id)
# ['<ROOT> This series is so much fun !', 0, '0:6:2 2:1:6 5:4:13 6:2:5 6:7:4 6:5:10 6:3:9', '9595620']

2020-09-25 10:44:53,900 - root <INFO> Make Input as Index...
100%|██████████| 10230700/10230700 [02:54<00:00, 58731.59it/s]
100%|██████████| 93500/93500 [00:01<00:00, 64417.66it/s]
100%|██████████| 2587531/2587531 [00:44<00:00, 58605.07it/s]


In [7]:
def make_input_adjacency_matrix(line):
    sentence, label, edges, book = line[0], float(line[1]), line[2].split(), line[3]
    edges = np.asarray([edge.split(":") for edge in edges])
    adjacency_matrix = matrix_to_torch_sparse_tensor(edges, maximum_length)

    return [sentence, adjacency_matrix, label, book]

def matrix_to_torch_sparse_tensor(edges, maximum_length):
    indices = torch.from_numpy(
        np.vstack((edges[:, 0], edges[:, 1])).astype(np.int64))
    values = torch.from_numpy(edges[:, 2].astype(np.int64))
    shape = torch.Size((maximum_length, maximum_length))

    return torch.sparse.FloatTensor(indices, values, shape)

logger.info("Make Adjacency Matrix...")
train_path = './adj_mats/train_adj.pickle'
val_path = './adj_mats/val_adj.pickle'
test_path = './adj_mats/test_adj.pickle'

if not os.path.exists(train_path):
    _train = [make_input_adjacency_matrix(line) for line in tqdm(_train)]
    _validation = [make_input_adjacency_matrix(line) for line in tqdm(_validation)]
    _test = [make_input_adjacency_matrix(line) for line in tqdm(_test)]
else:
    _train = pickle.load(open(train_path, 'rb'))
    _validation = pickle.load(open(val_path, 'rb'))
    _test = pickle.load(open(test_path, 'rb'))

2020-09-25 10:48:33,880 - root <INFO> Make Adjacency Matrix...
100%|██████████| 10230700/10230700 [19:11<00:00, 8882.77it/s] 
100%|██████████| 93500/93500 [00:10<00:00, 9137.02it/s]
100%|██████████| 2587531/2587531 [04:57<00:00, 8711.89it/s] 


In [8]:
def load_pre_trained_word_embedding(word_embedding_file_path, word2id):
    lines = [line.strip() for line in open(word_embedding_file_path).readlines()]
    pre_trained_word_embedding = {}
    for line in tqdm(lines):
        tokens = line.split()
        if len(tokens) != 301:
            continue
        pre_trained_word_embedding[tokens[0]] = np.asarray(tokens[1:]).astype(np.float32)

    word_embedding = np.random.uniform(size=(len(word2id), EMBEDDING_DIM))
    for key in tqdm(word2id.keys()):
        if key in pre_trained_word_embedding:
            word_embedding[word2id[key]] = pre_trained_word_embedding[key]

    word_embedding[0] = np.zeros(EMBEDDING_DIM)
    return torch.from_numpy(word_embedding)

logger.info("Load Pre-trained Word Embedding...")
word_embedding = load_pre_trained_word_embedding(PRE_TRAINED_WORD_EMBEDDING_FILE_PATH, word2id)

2020-09-25 11:12:55,259 - root <INFO> Load Pre-trained Word Embedding...
100%|██████████| 2196017/2196017 [06:36<00:00, 5545.26it/s]
100%|██████████| 622950/622950 [00:00<00:00, 798108.40it/s]


In [9]:
def make_batch(data, batch_size, word2id, book2genre, is_train=True):
    indices = np.arange(len(data))
    if is_train:
        random.shuffle(indices)

    if len(data) % batch_size == 0:
        batch_num = int(len(data)/batch_size)
    else:
        batch_num = int(len(data)/batch_size) + 1

    for i in range(batch_num):
        left = i*batch_size
        right = min((i+1)*batch_size, len(data))

        sentences = []
        adjacency_matrics = []
        labels = []
        genres = []

        for j in indices[left:right]:
            sentence = [word2id[word] for word in data[j][0].split()]
            sentence += [0]*(maximum_length - len(sentence))
            sentences.append(sentence)
            adjacency_matrics.append(data[j][1])
            labels.append(data[j][2])
            _genres = book2genre[int(data[j][3])]
            _genres += [0]*(maximum_genre_length - len(_genres))
            genres.append(_genres)

        yield sentences, adjacency_matrics, labels, genres

In [10]:
# ============================
# Model
# ============================
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim

        self.weight = nn.Parameter(
            torch.randn(self.input_dim, self.output_dim))
        nn.init.xavier_normal_(self.weight)

        self.bias = nn.Parameter(torch.zeros(self.output_dim))

    def forward(self, x, attention_weight):
        x = x*attention_weight.unsqueeze(3)
        x = x.sum(2)
        output = torch.matmul(x, self.weight)
        output = output + self.bias

        return output

class GenreEncoder(nn.Module):
    def __init__(self, num_filters, filter_sizes, genre_embedding_dim, maximum_genre_length):
        super(GenreEncoder, self).__init__()

        self.num_filters = num_filters
        self.filter_sizes = filter_sizes
        self.genre_embedding_dim = genre_embedding_dim
        self.maximum_genre_length = maximum_genre_length

        # ==============================
        # 1D CNN
        # ==============================
        self.cnn = nn.ModuleList([nn.Sequential(
            nn.Conv1d(self.genre_embedding_dim, self.num_filters, size),
            nn.ReLU(),
            nn.MaxPool1d(self.maximum_genre_length - size + 1)
        ) for size in self.filter_sizes])


    def forward(self, genres):
        genres = genres.transpose(1,2)
        convs = [conv(genres).squeeze() for conv in self.cnn]

        return torch.cat(convs, dim=1)


class Attention(nn.Module):
    def __init__(self, alpha, input_dim, output_dim, num_edges, maximum_length):
        super(Attention, self).__init__()

        self.maximum_length = maximum_length
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_edges = num_edges
        self.alpha = alpha

        # =============================================
        # Data Preparation
        # =============================================
        self.weight = nn.Parameter(
            torch.randn(self.input_dim, self.output_dim))
        nn.init.xavier_normal_(self.weight)

        self.edge_embedding \
        = nn.Embedding(self.num_edges, self.output_dim, padding_idx = 0)
        nn.init.xavier_normal_(self.edge_embedding.weight)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, x, adjacency_matrics):
        # ==================================
        # x: (B, N, H)
        # adjacency_matrics: (B, N, N)
        hidden = torch.matmul(x, self.weight) # (B, N, H')
        hidden = hidden.unsqueeze(1) # (B, 1, N, H')
        hidden = hidden.expand(hidden.size(0),
                               self.maximum_length,
                               self.maximum_length,
                               self.output_dim) # (B, N, N, H')

        edges = self.edge_embedding(adjacency_matrics)

        attention_weight = hidden*edges # (B, N, N, H')
        attention_weight = torch.sum(attention_weight, dim=3) # (B, N, N)
        attention_weight = self.leakyrelu(attention_weight)

        zero_vec = -9e15*torch.ones_like(attention_weight)
        attention_weight = torch.where(adjacency_matrics > 0, attention_weight, zero_vec)
        attention_weight = torch.softmax(attention_weight, dim=2)

        return attention_weight

class Model(nn.Module):
    def __init__(self,
                 num_words,
                 num_edges,
                 num_genres,
                 alpha,
                 embedding_dim,
                 genre_embedding_dim,
                 hidden_dim,
                 num_filters,
                 filter_sizes,
                 maximum_length,
                 maximum_genre_length,
                 pre_trained,
                 dropout_rate):
        super(Model, self).__init__()

        self.num_words = num_words
        self.num_edges = num_edges
        self.num_genres = num_genres
        self.embedding_dim = embedding_dim
        self.genre_embedding_dim = genre_embedding_dim
        self.hidden_dim = hidden_dim
        self.num_filters = num_filters
        self.filter_sizes = filter_sizes
        self.maximum_length = maximum_length
        self.maximum_genre_length = maximum_genre_length
        self.dropout_rate = dropout_rate
        self.alpha = alpha

        # =============================================
        # Data Preparation
        # =============================================
        self.word_embedding \
        = nn.Embedding.from_pretrained(pre_trained, freeze=False) # padding_idx = 0,
        self.genre_embedding \
        = nn.Embedding(self.num_genres, self.genre_embedding_dim, padding_idx = 0)

        self.attention_1 = Attention(self.alpha,
                                     2*self.hidden_dim[0],
                                     self.hidden_dim[0],
                                     self.num_edges,
                                     self.maximum_length)

        self.attention_2 = Attention(self.alpha,
                                     self.hidden_dim[0],
                                     self.hidden_dim[1],
                                     self.num_edges,
                                     self.maximum_length)

        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim[0], bidirectional=True, batch_first=True)

        self.gcn_layer_1 \
        = GCNLayer(2*self.hidden_dim[0], self.hidden_dim[0])
        self.gcn_layer_2 \
        = GCNLayer(self.hidden_dim[0], self.hidden_dim[1])

        self.genre_encoder = GenreEncoder(self.num_filters,
                                          self.filter_sizes,
                                          self.genre_embedding_dim,
                                          self.maximum_genre_length)

        self.output_layer = nn.Sequential(
            nn.Dropout(self.dropout_rate),
            nn.Linear(self.hidden_dim[1], self.hidden_dim[1]),
            nn.ReLU(),
            nn.Dropout(self.dropout_rate),
            nn.Linear(self.hidden_dim[1], 2)
        )

        self.dropout = nn.Dropout(self.dropout_rate)

    def forward(self, sentences, adjacency_matrics, genres):
        zero_vec = torch.zeros_like(adjacency_matrics)
        adjacency_matrics_t = torch.where(adjacency_matrics > 0,
                                          adjacency_matrics + int((self.num_edges-2)/2),
                                          zero_vec)
        adjacency_matrics_t = adjacency_matrics_t.transpose(1,2)
        eye = torch.eye(adjacency_matrics.size(1), dtype=torch.long).cuda()
        # eye = torch.eye(adjacency_matrics.size(1), dtype=torch.long)
        eye = eye.unsqueeze(0).expand(sentences.size(0),
                                      self.maximum_length,
                                      self.maximum_length)
        adjacency_matrics = adjacency_matrics \
                          + adjacency_matrics_t \
                          + eye # (B, N, N)

        embedded_words = self.word_embedding(sentences) # (B, N, D)
        h0 = torch.zeros(2, sentences.size(0), self.hidden_dim[0]).cuda() # 2 for bidirection
        c0 = torch.zeros(2, sentences.size(0), self.hidden_dim[0]).cuda()
        self.lstm.flatten_parameters()
        lstm = self.lstm(embedded_words, (h0, c0))[0] # (B, N, 2H)
        attention_weight_1 = self.attention_1(lstm, adjacency_matrics)
        lstm = lstm.unsqueeze(1)
        lstm = lstm.expand(lstm.size(0),
                          self.maximum_length,
                          self.maximum_length,
                          2*self.hidden_dim[0])

        gcn_1 = self.gcn_layer_1(lstm, attention_weight_1)
        gcn_1 = torch.relu(gcn_1) # B X N X H
        gcn_1 = self.dropout(gcn_1)

        attention_weight_2 = self.attention_2(gcn_1, adjacency_matrics)
        gcn_1 = gcn_1.unsqueeze(1)
        gcn_1 = gcn_1.expand(gcn_1.size(0),
                          self.maximum_length,
                          self.maximum_length,
                          self.hidden_dim[0])
        gcn_2 = self.gcn_layer_2(gcn_1, attention_weight_2)
        gcn_2 = torch.relu(gcn_2) # (B, N, H')

        genres = self.genre_embedding(genres) # (B, N', G)
        genre_features = self.genre_encoder(genres) # (B, H')
        attention_weight_3 = (gcn_2*genre_features.unsqueeze(1)).sum(2) # (B, N)
        zero_vec = -9e15*torch.ones_like(attention_weight_3)
        attention_weight_3 = torch.where(sentences > 0, attention_weight_3, zero_vec)
        attention_weight_3 = torch.softmax(attention_weight_3, dim=1) # (B, N)

        sentence_representations = (gcn_2*attention_weight_3.unsqueeze(2)).sum(1) # (B, H')

        output = self.output_layer(sentence_representations)

        return output

In [11]:
# =============================================
# Model Initialize
# =============================================
logger.info("Model Initializing..")
weight = torch.ones(2).cuda()
# weight = torch.ones(2)
weight[0] = 0.05
criterion = nn.CrossEntropyLoss(weight=weight)

model = Model(len(word2id), len(edge2id), len(genre2id),
              LEAKY_ALPHA, EMBEDDING_DIM, GENRE_EMBEDDING_DIM, HIDDEN_STATES,
              NUM_FILTERS, FILTER_SIZES, maximum_length, maximum_genre_length,
              word_embedding.type("torch.FloatTensor"), DROPOUT_RATE)
model = nn.DataParallel(model).cuda()

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

2020-09-25 11:19:42,797 - root <INFO> Model Initializing..


In [12]:
# del model
# del batches
# del input_sentences
# del input_adjacency_matrics
# del input_labels
# del input_genres
# del logits
# del long_logits
# del entire_labels
# del entire_logits

In [None]:
# =============================================
# Model Training
# =============================================
now = datetime.now().strftime('%m-%d_%H:%M')
result_path = './results/{}_results.txt'.format(now)

best_model = {}
best = np.zeros(4)
logger.info("\nModel Training..")
for i in range(EPOCHS):
    model.train()
    start = time.time()

    if len(_train) % BATCH_SIZE == 0:
        batch_num = int(len(_train)/BATCH_SIZE)
    else:
        batch_num = int(len(_train)/BATCH_SIZE) + 1

    loss = .0
    batches = make_batch(_train, BATCH_SIZE, word2id, book2genre)
    step = 0

    for batch in batches:
        count = 0
        correct = 0
        positive_answer = 0
        positive_actual = 0

        sentences, adjacency_matrics, labels, genres = batch
        input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
        input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
        input_labels = torch.tensor(labels, dtype=torch.long).cuda()
        input_genres = torch.tensor(genres, dtype=torch.long).cuda()

        optimizer.zero_grad()
        logits = model(input_sentences, input_adjacency_matrics, input_genres)
        _loss = criterion(logits, input_labels).sum()
        _loss.backward()
        optimizer.step()
        loss += _loss.item()

        step+=1

        sys.stdout.flush()
        sys.stdout.write("\r" + "Epoch: [{}/{}] Train Batch: [{}/{}]".format(i+1, EPOCHS, step, batch_num))
#         result.write("Epoch: [{}/{}] Train Batch: [{}/{}]\n".format(i+1, EPOCHS, step, batch_num))
        if step % 1000 == 0:
            long_logits = torch.argmax(logits, dim=1)
            positive_answer += long_logits.sum().item()
            positive_actual += (input_labels == 1.0).float().sum().item()
            correct+=(long_logits*input_labels).sum().item()
            count+= (long_logits==input_labels).sum().item()

            accuracy = 100*float(count) / len(sentences)
            if positive_answer == 0:
                precision = 0.0
            else:
                precision = float(correct) / positive_answer
            recall = float(correct) / positive_actual
            if (precision + recall) == 0.0:
                f1 = 0.0
            else:
                f1 = 2*precision*recall / (precision + recall)
            auroc = roc_auc_score(input_labels.cpu().detach().numpy(), logits[:, 1].cpu().detach().numpy())

            print(" [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}] Loss: {:.4f}".format(accuracy, precision, recall, f1, auroc, loss/1000))
            result = open(result_path, 'a')
            result.write("Epoch: [{}/{}] Train Batch: [{}/{}] [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}] Loss: {:.4f}\n".format(i+1, EPOCHS, step, batch_num, accuracy, precision, recall, f1, auroc, loss/1000))
            result.close()
            loss = .0

    if (i+1) % 1 == 0:
        model.eval()

        batches = make_batch(_validation, BATCH_SIZE, word2id, book2genre, False)
        if len(_validation) % BATCH_SIZE == 0:
            batch_num = int(len(_validation)/BATCH_SIZE)
        else:
            batch_num = int(len(_validation)/BATCH_SIZE) + 1

        step = 0
        count = 0
        correct = 0
        positive_answer = 0
        positive_actual = 0
        for batch in batches:
            sentences, adjacency_matrics, labels, genres = batch
            input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
            input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
            input_labels = torch.tensor(labels, dtype=torch.long).cuda()
            input_genres = torch.tensor(genres, dtype=torch.long).cuda()
            logits = model(input_sentences, input_adjacency_matrics, input_genres)
            long_logits = torch.argmax(logits, dim=1)

            if step == 0:
                entire_labels = input_labels.cpu().detach()
                entire_logits = logits.cpu().detach()
            else:
                entire_labels = torch.cat([entire_labels, input_labels.cpu().detach()], dim=0)
                entire_logits = torch.cat([entire_logits, logits.cpu().detach()], dim=0)
            positive_answer += long_logits.sum().item()
            positive_actual += (input_labels == 1.0).float().sum().item()
            correct+=(long_logits*input_labels).sum().item()
            count+= (long_logits==input_labels).sum().item()
            step+=1

            sys.stdout.flush()
            sys.stdout.write("\r" + "Epoch: [{}/{}] Eval Batch: [{}/{}]".format(i+1, EPOCHS, step, batch_num))
#             result.write("Epoch: [{}/{}] Eval Batch: [{}/{}]\n".format(i+1, EPOCHS, step, batch_num))

        accuracy = 100*float(count)/len(_validation)
        if positive_answer == 0:
            precision = 0.0
        else:
            precision = float(correct)/positive_answer
        recall = float(correct)/positive_actual
        if (precision+recall) == 0.0:
            f1 = 0.0
        else:
            f1 = 2*precision*recall/(precision+recall)
        auroc = roc_auc_score(entire_labels.numpy(), entire_logits[:, 1].numpy())
        del entire_labels
        del entire_logits
        print(" [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}] Loss: {:.4f}".format(accuracy, precision, recall, f1, auroc, loss))
        result = open(result_path, 'a')
        result.write("Epoch: [{}/{}] Eval Batch: [{}/{}] [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}] Loss: {:.4f}\n".format(i+1, EPOCHS, step, batch_num, accuracy, precision, recall, f1, auroc, loss))
        result.close()
        
        if auroc > best[3]:
            past = best[3]
            best = precision, recall, f1, auroc
            best_model = copy.deepcopy(model.state_dict())
            if past != 0:
                print("Current Best:)")
                result = open(result_path, 'a')
                result.write("Current Best:)\n")
                result.close()
                modelname = '{}_only_attention{}'.format(now, i+1)
                torch.save(best_model, './models/' + modelname + '.pt')
                
                ## Start Testing
                batches = make_batch(_test, BATCH_SIZE, word2id, book2genre, False)
                if len(_test) % BATCH_SIZE == 0:
                    batch_num = int(len(_test)/BATCH_SIZE)
                else:
                    batch_num = int(len(_test)/BATCH_SIZE) + 1

                step = 0
                count = 0
                correct = 0
                positive_answer = 0
                positive_actual = 0
                for batch in batches:
                    sentences, adjacency_matrics, labels, genres = batch
                    input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
                    input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
                    input_labels = torch.tensor(labels, dtype=torch.long).cuda()
                    input_genres = torch.tensor(genres, dtype=torch.long).cuda()
                    logits = model(input_sentences, input_adjacency_matrics, input_genres)
                    long_logits = torch.argmax(logits, dim=1)

                    if step == 0:
                        entire_labels = input_labels.cpu().detach()
                        entire_logits = logits.cpu().detach()
                    else:
                        entire_labels = torch.cat([entire_labels, input_labels.cpu().detach()], dim=0)
                        entire_logits = torch.cat([entire_logits, logits.cpu().detach()], dim=0)
                    positive_answer += long_logits.sum().item()
                    positive_actual += (input_labels == 1.0).float().sum().item()
                    correct+=(long_logits*input_labels).sum().item()
                    count+= (long_logits==input_labels).sum().item()
                    step+=1

                    sys.stdout.flush()
                    sys.stdout.write("\r" + "Test Batch: [{}/{}]".format(step, batch_num))

                accuracy = 100*float(count)/len(_test)
                if positive_answer == 0:
                    precision = 0.0
                else:
                    precision = float(correct)/positive_answer
                recall = float(correct)/positive_actual
                if (precision+recall) == 0.0:
                    f1 = 0.0
                else:
                    f1 = 2*precision*recall/(precision+recall)
                auroc = roc_auc_score(entire_labels.numpy(), entire_logits[:, 1].numpy())

                print("{} [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}]".format(modelname, accuracy, precision, recall, f1, auroc))
                result = open(result_path, 'a')
                result.write("{} [A/P/R/F/ROC]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}/{:.4f}]\n".format(modelname, accuracy, precision, recall, f1, auroc))
                result.close()

2020-09-25 11:19:44,951 - root <INFO> 
Model Training..


Epoch: [1/100] Train Batch: [1000/9991] [A/P/R/F/ROC]: [77.5391/0.0798/0.6333/0.1418/0.7721] Loss: 0.5601
Epoch: [1/100] Train Batch: [2000/9991] [A/P/R/F/ROC]: [81.0547/0.0606/0.6000/0.1101/0.7820] Loss: 0.5300
Epoch: [1/100] Train Batch: [3000/9991] [A/P/R/F/ROC]: [86.2305/0.1333/0.6452/0.2210/0.8256] Loss: 0.5228
Epoch: [1/100] Train Batch: [4000/9991] [A/P/R/F/ROC]: [88.1836/0.1290/0.5517/0.2092/0.8176] Loss: 0.5206
Epoch: [1/100] Train Batch: [5000/9991] [A/P/R/F/ROC]: [86.8164/0.0963/0.5000/0.1615/0.8608] Loss: 0.5177
Epoch: [1/100] Train Batch: [6000/9991] [A/P/R/F/ROC]: [85.3516/0.1083/0.6296/0.1848/0.7728] Loss: 0.5111
Epoch: [1/100] Train Batch: [7000/9991] [A/P/R/F/ROC]: [84.4727/0.1104/0.5625/0.1846/0.7794] Loss: 0.5097
Epoch: [1/100] Train Batch: [8000/9991] [A/P/R/F/ROC]: [84.9609/0.0789/0.4615/0.1348/0.7215] Loss: 0.5084
Epoch: [1/100] Train Batch: [9000/9991] [A/P/R/F/ROC]: [87.0117/0.1538/0.6471/0.2486/0.9038] Loss: 0.5066
Epoch: [1/100] Eval Batch: [92/92] [A/P/R/F/RO

Epoch: [8/100] Train Batch: [6000/9991] [A/P/R/F/ROC]: [87.3047/0.1212/0.5333/0.1975/0.8339] Loss: 0.4576
Epoch: [8/100] Train Batch: [7000/9991] [A/P/R/F/ROC]: [85.2539/0.0784/0.5455/0.1371/0.8037] Loss: 0.4578
Epoch: [8/100] Train Batch: [8000/9991] [A/P/R/F/ROC]: [83.2031/0.0955/0.6071/0.1650/0.8174] Loss: 0.4578
Epoch: [8/100] Train Batch: [9000/9991] [A/P/R/F/ROC]: [88.5742/0.1280/0.6667/0.2148/0.9003] Loss: 0.4618
Epoch: [8/100] Eval Batch: [92/92] [A/P/R/F/ROC]: [86.1540/0.1031/0.5665/0.1745/0.8163] Loss: 457.9049
Current Best:)
Test Batch: [2527/2527]09-25_11:19_only_attention8 [A/P/R/F/ROC]: [86.3096/0.1058/0.5505/0.1775/0.8146]
Epoch: [9/100] Train Batch: [1000/9991] [A/P/R/F/ROC]: [87.5977/0.1397/0.6552/0.2303/0.8532] Loss: 0.4367
Epoch: [9/100] Train Batch: [2000/9991] [A/P/R/F/ROC]: [89.7461/0.1364/0.6000/0.2222/0.8503] Loss: 0.4429
Epoch: [9/100] Train Batch: [3000/9991] [A/P/R/F/ROC]: [88.9648/0.1176/0.6364/0.1986/0.8505] Loss: 0.4477
Epoch: [9/100] Train Batch: [4000/99

Epoch: [15/100] Eval Batch: [92/92] [A/P/R/F/ROC]: [88.2920/0.1089/0.4919/0.1783/0.8095] Loss: 446.5270
Epoch: [16/100] Train Batch: [1000/9991] [A/P/R/F/ROC]: [82.6172/0.0718/0.5652/0.1275/0.7745] Loss: 0.4152
Epoch: [16/100] Train Batch: [2000/9991] [A/P/R/F/ROC]: [86.9141/0.0846/0.4231/0.1410/0.8514] Loss: 0.4239
Epoch: [16/100] Train Batch: [3000/9991] [A/P/R/F/ROC]: [87.1094/0.1151/0.6400/0.1951/0.8894] Loss: 0.4301
Epoch: [16/100] Train Batch: [4000/9991] [A/P/R/F/ROC]: [84.2773/0.0994/0.7083/0.1744/0.8910] Loss: 0.4357
Epoch: [16/100] Train Batch: [5000/9991] [A/P/R/F/ROC]: [85.4492/0.1062/0.7391/0.1858/0.8915] Loss: 0.4385
Epoch: [16/100] Train Batch: [6000/9991] [A/P/R/F/ROC]: [86.7188/0.1477/0.7097/0.2444/0.8961] Loss: 0.4422
Epoch: [16/100] Train Batch: [7000/9991] [A/P/R/F/ROC]: [86.6211/0.1429/0.6562/0.2346/0.8938] Loss: 0.4437
Epoch: [16/100] Train Batch: [8000/9991] [A/P/R/F/ROC]: [88.2812/0.1077/0.7778/0.1892/0.9219] Loss: 0.4428
Epoch: [16/100] Train Batch: [9000/9991]

Epoch: [23/100] Train Batch: [7000/9991] [A/P/R/F/ROC]: [85.0586/0.1019/0.5714/0.1730/0.8139] Loss: 0.4336
Epoch: [23/100] Train Batch: [8000/9991] [A/P/R/F/ROC]: [85.7422/0.1290/0.6452/0.2151/0.8389] Loss: 0.4351
Epoch: [23/100] Train Batch: [9000/9991] [A/P/R/F/ROC]: [85.8398/0.0909/0.7368/0.1618/0.8914] Loss: 0.4400
Epoch: [23/100] Eval Batch: [92/92] [A/P/R/F/ROC]: [86.9636/0.1025/0.5217/0.1713/0.8065] Loss: 437.2518
Epoch: [24/100] Train Batch: [1000/9991] [A/P/R/F/ROC]: [86.1328/0.1032/0.8421/0.1839/0.9265] Loss: 0.3964
Epoch: [24/100] Train Batch: [2000/9991] [A/P/R/F/ROC]: [86.6211/0.1241/0.6429/0.2081/0.9063] Loss: 0.4092
Epoch: [24/100] Train Batch: [3000/9991] [A/P/R/F/ROC]: [88.7695/0.1587/0.6897/0.2581/0.8732] Loss: 0.4156
Epoch: [24/100] Train Batch: [4000/9991] [A/P/R/F/ROC]: [86.6211/0.1533/0.6970/0.2514/0.8986] Loss: 0.4213
Epoch: [24/100] Train Batch: [5000/9991] [A/P/R/F/ROC]: [88.9648/0.1600/0.7143/0.2614/0.8862] Loss: 0.4253
Epoch: [24/100] Train Batch: [6000/9991]

Epoch: [31/100] Train Batch: [4000/9991] [A/P/R/F/ROC]: [87.2070/0.1324/0.5806/0.2156/0.8801] Loss: 0.4100
Epoch: [31/100] Train Batch: [5000/9991] [A/P/R/F/ROC]: [86.3281/0.1625/0.8125/0.2708/0.9235] Loss: 0.4148
Epoch: [31/100] Train Batch: [6000/9991] [A/P/R/F/ROC]: [89.0625/0.1694/0.7000/0.2727/0.8736] Loss: 0.4194
Epoch: [31/100] Train Batch: [7000/9991] [A/P/R/F/ROC]: [86.8164/0.1103/0.7273/0.1916/0.9032] Loss: 0.4270
Epoch: [31/100] Train Batch: [8000/9991] [A/P/R/F/ROC]: [84.6680/0.1310/0.6667/0.2189/0.8516] Loss: 0.4263
Epoch: [31/100] Train Batch: [9000/9991] [A/P/R/F/ROC]: [82.0312/0.0632/0.6667/0.1154/0.8327] Loss: 0.4314
Epoch: [31/100] Eval Batch: [92/92] [A/P/R/F/ROC]: [87.2781/0.1046/0.5193/0.1741/0.8095] Loss: 429.1055
Epoch: [32/100] Train Batch: [1000/9991] [A/P/R/F/ROC]: [87.0117/0.1765/0.7941/0.2888/0.9177] Loss: 0.3789
Epoch: [32/100] Train Batch: [2000/9991] [A/P/R/F/ROC]: [88.3789/0.1550/0.6667/0.2516/0.9011] Loss: 0.3906
Epoch: [32/100] Train Batch: [3000/9991]