In [1]:
import json
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import scipy.sparse as spwords
import time
import random
from concurrent.futures import ProcessPoolExecutor as prpExecutor
import sys
import copy
import scipy.sparse as sp
from tqdm import tqdm_notebook, tqdm

import warnings
warnings.filterwarnings(action='once')

In [2]:
# ============================
# Data File Path
# ============================
# TRAIN_DATA_FILE_PATH = "/hdd1/Spoiler_Detection/ACL/ACL_GENRE/node_edge_info_train_withid.json"
# VALIDATION_DATA_FILE_PATH = "/hdd1/Spoiler_Detection/ACL/ACL_GENRE/node_edge_info_valid_withid.json"
# TEST_DATA_FILE_PATH = "/hdd1/Spoiler_Detection/ACL/ACL_GENRE/node_edge_info_test_withid.json"
# PRE_TRAINED_WORD_EMBEDDING_FILE_PATH = "/hdd1/Spoiler_Detection/TVTropes/glove.840B.300d.txt"
# GENRE_DICT = "/hdd1/Spoiler_Detection/ACL/ACL_GENRE/genre_dict.pickle"
TRAIN_DATA_FILE_PATH = "./parsed_data/node_edge_info_train_withid.json"
VALIDATION_DATA_FILE_PATH = "./parsed_data/node_edge_info_valid_withid.json"
TEST_DATA_FILE_PATH = "./parsed_data/node_edge_info_test_withid.json"
PRE_TRAINED_WORD_EMBEDDING_FILE_PATH = "./parsed_data/glove.840B.300d.txt"
GENRE_DICT = "./parsed_data/genre_dict.pickle"

# ============================
# Model Hyper Parameter
# ============================
EMBEDDING_DIM = 300
GENRE_EMBEDDING_DIM = 50
HIDDEN_STATES = [100, 100]
NUM_FILTERS = 50
FILTER_SIZES = [2,3]
NUM_HEADS = 3
LEAKY_ALPHA = 0.2

# ============================
# Training Hyper Parameter
# ============================
EPOCHS = 300
LEARNING_RATE = 0.001
BATCH_SIZE = 512
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.5
RANDOM_SEED = 26

# ============================
# Set Random Seed
# ============================
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

In [3]:
# ============================
# Data Pre-Processing
# ============================
def load_data(train_file_path, validation_file_path, test_file_path, genre_dict_file):
    with open(train_file_path) as f:
        train = json.load(f)
    with open(validation_file_path) as f:
        validation = json.load(f)
    with open(test_file_path) as f:
        test = json.load(f)
    with open(genre_dict_file, "rb") as f:
        genre_dict = pickle.load(f)
        
    return train, validation, test, genre_dict

# ============================
# Data Pre Processing
# ============================
print("Load Data...")
start = time.time()
train, validation, test, genre_dict = load_data(TRAIN_DATA_FILE_PATH,
                                                VALIDATION_DATA_FILE_PATH, 
                                                TEST_DATA_FILE_PATH,
                                                GENRE_DICT)
print(int(time.time() - start))

Load Data...
202


In [4]:
def check(review):
    for line in review:
        a = len(line.split("\t")[0].split())
        if a>50:
            return False
    return True

def make_dictionary(_train, _validation, _test, genre_dict):
    train, validation, test = [], [], []
    for line in tqdm_notebook(_train):
        if check(line["text_info"]):
            train += [_line for _line in line["text_info"]]
    for line in tqdm_notebook(_validation):
        if check(line["text_info"]):
            validation += [_line for _line in line["text_info"]]
    for line in tqdm_notebook(_test):
        if check(line["text_info"]):
            test += [_line for _line in line["text_info"]]
        
    data = []
    data += train
    data += validation
    data += test
    
    global maximum_length
    maximum_length = max([len(line.split("\t")[0].split()) for line in data])
    global maximum_genre_length
    maximum_genre_length = max([len(value) for _, value in genre_dict.items()])
    
    word2id = {"<PAD>":0}
    id2word = ["<PAD>"]
    edge2id = {"<NONE>":0, "<SELF>": 1}
    id2edge = ["<NONE>", "<SELF>"]
    genre2id = {"<PAD>":0}
    id2genre = ["<PAD>"]
    
    for line in tqdm_notebook(data):
        tokens = line.split("\t")
        for word in tokens[0].split():
            if word not in word2id:
                word2id[word] = len(word2id)
                id2word.append(word)
        for edges in tokens[3:]:
            _tokens = edges.split(":")
            if len(_tokens) != 3:
                start, end = _tokens[0], _tokens[1]
                edge = ":".join(_tokens[2:])
            else:
                start, end, edge = _tokens
            if edge not in edge2id:
                edge2id[edge] = len(edge2id)
                id2edge.append(edge)
            del _tokens
        del tokens
    del data, _train, _validation, _test
    
    book2genre = {}
    for key, value in genre_dict.items():
        for genre in value:
            if genre not in genre2id:
                genre2id[genre] = len(genre2id)
                id2genre.append(genre)
        book2genre[key] = [genre2id[genre] for genre in value]

    num_edges = len(edge2id)
    for i in range(num_edges):
        key = id2edge[i]
        if key != "<NONE>" and key != "<SELF>":
            opposite = key+"'"
            edge2id[opposite] = edge2id[key]+num_edges-2
    
    return train, validation, test, word2id, id2word, edge2id, id2edge, genre2id, id2genre, book2genre, maximum_length, maximum_genre_length

print("Make Dictionary...")
start = time.time()
_train, _validation, _test, word2id, id2word, edge2id, id2edge, genre2id, id2genre, book2genre, maximum_length, maximum_genre_length \
= make_dictionary(train, validation, test, genre_dict)
print('train: {}, valid: {}, test: {}, overall: {}, max_len: {}'.format(len(_train), len(_validation), len(_test), len(_train) + len(_validation) + len(_test), maximum_length))

book2genre[24711433] = []

Make Dictionary...


HBox(children=(IntProgress(value=0, max=1091952), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=276081), HTML(value='')))




HBox(children=(IntProgress(value=0, max=12911731), HTML(value='')))


train: 10230700, valid: 93500, test: 2587531, overall: 12911731, max_len: 50


In [5]:
def make_input_data_as_index(_data, word2id, edge2id):
    data = []
    for line in tqdm_notebook(_data):
        tokens = line.split("\t")
        sentence, label, book = tokens[0], int(tokens[1]), tokens[2]
        _edges = []
        for edges in tokens[3:]:
            _tokens = edges.split(":")
            if len(_tokens) != 3:
                start, end = _tokens[0], _tokens[1]
                edge = ":".join(_tokens[2:])
            else:
                start, end, edge = _tokens
            _edges.append(":".join([start, end, str(edge2id[edge])]))
            del _tokens
        data.append([sentence, label, " ".join(_edges), book])
        del tokens
    return data

print("Make Input as Index...")
start = time.time()
_train = make_input_data_as_index(_train, word2id, edge2id)
_validation = make_input_data_as_index(_validation, word2id, edge2id)
_test = make_input_data_as_index(_test, word2id, edge2id)
print(int(time.time() - start))

Make Input as Index...


HBox(children=(IntProgress(value=0, max=10230700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=93500), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2587531), HTML(value='')))


406


In [6]:
def make_input_adjacency_matrix(line):
    sentence, label, edges, book = line[0], float(line[1]), line[2].split(), line[3]
    edges = np.asarray([edge.split(":") for edge in edges])
    adjacency_matrix = matrix_to_torch_sparse_tensor(edges, maximum_length)
    
    return [sentence, adjacency_matrix, label, book]

def matrix_to_torch_sparse_tensor(edges, maximum_length):
    indices = torch.from_numpy(
        np.vstack((edges[:, 0], edges[:, 1])).astype(np.int64))
    values = torch.from_numpy(edges[:, 2].astype(np.int64))
    shape = torch.Size((maximum_length, maximum_length))

    return torch.sparse.FloatTensor(indices, values, shape)

print("Make Adjacency Matrix...")
start = time.time()
pool = prpExecutor(max_workers=5)
# _train = list(pool.map(make_input_adjacency_matrix, tqdm_notebook(_train)))
# _validation = list(pool.map(make_input_adjacency_matrix, tqdm(_validation)))
_train = [make_input_adjacency_matrix(line) for line in tqdm_notebook(_train)]
_validation = [make_input_adjacency_matrix(line) for line in tqdm_notebook(_validation)]
_test = [make_input_adjacency_matrix(line) for line in tqdm_notebook(_test)]
del pool
print(int(time.time() - start))

Make Adjacency Matrix...


HBox(children=(IntProgress(value=0, max=10230700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=93500), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2587531), HTML(value='')))


2568


In [7]:
def load_pre_trained_word_embedding(word_embedding_file_path, word2id):
    lines = [line.strip() for line in open(word_embedding_file_path).readlines()]
    pre_trained_word_embedding = {}
    for line in tqdm_notebook(lines):
        tokens = line.split()
        if len(tokens) != 301:
            continue
        pre_trained_word_embedding[tokens[0]] = np.asarray(tokens[1:]).astype(np.float32)
        
    word_embedding = np.random.uniform(size=(len(word2id), EMBEDDING_DIM))
    for key in tqdm_notebook(word2id.keys()):
        if key in pre_trained_word_embedding:
            word_embedding[word2id[key]] = pre_trained_word_embedding[key]
    
    word_embedding[0] = np.zeros(EMBEDDING_DIM)
    return torch.from_numpy(word_embedding)

print("Load Pre-trained Word Embedding...")
word_embedding = load_pre_trained_word_embedding(PRE_TRAINED_WORD_EMBEDDING_FILE_PATH, word2id)

Load Pre-trained Word Embedding...


  


HBox(children=(IntProgress(value=0, max=2196017), HTML(value='')))




HBox(children=(IntProgress(value=0, max=622950), HTML(value='')))




In [8]:
def make_batch(data, batch_size, word2id, book2genre, is_train=True):
    indices = np.arange(len(data))
    if is_train:
        random.shuffle(indices)
    
    if len(data) % batch_size == 0:
        batch_num = int(len(data)/batch_size)
    else:
        batch_num = int(len(data)/batch_size) + 1
        
    for i in range(batch_num):
        left = i*batch_size
        right = min((i+1)*batch_size, len(data))
        
        sentences = []
        adjacency_matrics = []
        labels = []
        genres = []
        
        for j in indices[left:right]:
            sentence = [word2id[word] for word in data[j][0].split()]
            sentence += [0]*(maximum_length - len(sentence))
            sentences.append(sentence)
            adjacency_matrics.append(data[j][1])
            labels.append(data[j][2])
            _genres = book2genre[int(data[j][3])]
            _genres += [0]*(maximum_genre_length - len(_genres))
            genres.append(_genres)
        
        yield sentences, adjacency_matrics, labels, genres

In [9]:
# ============================
# Model
# ============================
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim

        self.weight = nn.Parameter(
            torch.randn(self.input_dim, self.output_dim))
        nn.init.xavier_normal_(self.weight)

        self.bias = nn.Parameter(torch.zeros(self.output_dim))

    def forward(self, x, attention_weight):
        x = x*attention_weight.unsqueeze(3)
        x = x.sum(2)
        output = torch.matmul(x, self.weight)
        output = output + self.bias

        return output
    
class GenreEncoder(nn.Module):
    def __init__(self, num_filters, filter_sizes, genre_embedding_dim, maximum_genre_length):
        super(GenreEncoder, self).__init__()
        
        self.num_filters = num_filters
        self.filter_sizes = filter_sizes
        self.genre_embedding_dim = genre_embedding_dim
        self.maximum_genre_length = maximum_genre_length
        
        # ==============================
        # 1D CNN
        # ==============================
        self.cnn = nn.ModuleList([nn.Sequential(
            nn.Conv1d(self.genre_embedding_dim, self.num_filters, size),
            nn.ReLU(),
            nn.MaxPool1d(self.maximum_genre_length - size + 1)
        ) for size in self.filter_sizes])


    def forward(self, genres):
        genres = genres.transpose(1,2)
        convs = [conv(genres).squeeze() for conv in self.cnn]

        return torch.cat(convs, dim=1)
        
    
class Attention(nn.Module):
    def __init__(self, alpha, input_dim, output_dim, num_edges, maximum_length):
        super(Attention, self).__init__()

        self.maximum_length = maximum_length
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_edges = num_edges
        self.alpha = alpha

        # =============================================
        # Data Preparation
        # =============================================
        self.weight = nn.Parameter(
            torch.randn(self.input_dim, self.output_dim))
        nn.init.xavier_normal_(self.weight)
        
        self.edge_embedding \
        = nn.Embedding(self.num_edges, self.output_dim, padding_idx = 0)
        nn.init.xavier_normal_(self.edge_embedding.weight)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, x, adjacency_matrics):
        # ==================================
        # x: (B, N, H)
        # adjacency_matrics: (B, N, N)
        hidden = torch.matmul(x, self.weight) # (B, N, H')
        hidden = hidden.unsqueeze(1) # (B, 1, N, H')
        hidden = hidden.expand(hidden.size(0),
                               self.maximum_length,
                               self.maximum_length,
                               self.output_dim) # (B, N, N, H')
            
        edges = self.edge_embedding(adjacency_matrics)

        attention_weight = hidden*edges # (B, N, N, H')
        attention_weight = torch.sum(attention_weight, dim=3) # (B, N, N)
        attention_weight = self.leakyrelu(attention_weight)
        
        zero_vec = -9e15*torch.ones_like(attention_weight)
        attention_weight = torch.where(adjacency_matrics > 0, attention_weight, zero_vec)
        attention_weight = torch.softmax(attention_weight, dim=2)
        
        return attention_weight
    
class Model(nn.Module):
    def __init__(self, 
                 num_words, 
                 num_edges, 
                 num_genres,
                 alpha, 
                 embedding_dim, 
                 genre_embedding_dim,
                 hidden_dim,
                 num_filters,
                 filter_sizes,
                 maximum_length,
                 maximum_genre_length,
                 pre_trained, 
                 dropout_rate):
        super(Model, self).__init__()

        self.num_words = num_words
        self.num_edges = num_edges
        self.num_genres = num_genres
        self.embedding_dim = embedding_dim
        self.genre_embedding_dim = genre_embedding_dim
        self.hidden_dim = hidden_dim
        self.num_filters = num_filters
        self.filter_sizes = filter_sizes
        self.maximum_length = maximum_length
        self.maximum_genre_length = maximum_genre_length
        self.dropout_rate = dropout_rate
        self.alpha = alpha

        # =============================================
        # Data Preparation
        # =============================================
        self.word_embedding \
        = nn.Embedding.from_pretrained(pre_trained, freeze=False)
        self.genre_embedding \
        = nn.Embedding(self.num_genres, self.genre_embedding_dim, padding_idx = 0)
        
        self.attention_1 = Attention(self.alpha,
                                     2*self.hidden_dim[0], 
                                     self.hidden_dim[0], 
                                     self.num_edges, 
                                     self.maximum_length)

        self.attention_2 = Attention(self.alpha,
                                     self.hidden_dim[0], 
                                     self.hidden_dim[1], 
                                     self.num_edges, 
                                     self.maximum_length)

        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim[0], bidirectional=True, batch_first=True)
        
        self.gcn_layer_1 \
        = GCNLayer(2*self.hidden_dim[0], self.hidden_dim[0])
        self.gcn_layer_2 \
        = GCNLayer(self.hidden_dim[0], self.hidden_dim[1])
        
        self.genre_encoder = GenreEncoder(self.num_filters, 
                                          self.filter_sizes, 
                                          self.genre_embedding_dim, 
                                          self.maximum_genre_length)
        
        self.output_layer = nn.Sequential(
            nn.Dropout(self.dropout_rate),
            nn.Linear(self.hidden_dim[1], self.hidden_dim[1]),
            nn.ReLU(),
            nn.Dropout(self.dropout_rate),
            nn.Linear(self.hidden_dim[1], 2)
        )
        
        self.dropout = nn.Dropout(self.dropout_rate)

    def forward(self, sentences, adjacency_matrics, genres):
        zero_vec = torch.zeros_like(adjacency_matrics)
        adjacency_matrics_t = torch.where(adjacency_matrics > 0, 
                                          adjacency_matrics + int((self.num_edges-2)/2), 
                                          zero_vec)
        adjacency_matrics_t = adjacency_matrics_t.transpose(1,2)
        eye = torch.eye(adjacency_matrics.size(1), dtype=torch.long).cuda()
        eye = eye.unsqueeze(0).expand(sentences.size(0),
                                      self.maximum_length,
                                      self.maximum_length)
        adjacency_matrics = adjacency_matrics \
                          + adjacency_matrics_t \
                          + eye # (B, N, N)
        
        embedded_words = self.word_embedding(sentences) # (B, N, D)
        h0 = torch.zeros(2, sentences.size(0), self.hidden_dim[0]).cuda() # 2 for bidirection 
        c0 = torch.zeros(2, sentences.size(0), self.hidden_dim[0]).cuda()
        self.lstm.flatten_parameters()
        lstm = self.lstm(embedded_words, (h0, c0))[0] # (B, N, 2H)
        attention_weight_1 = self.attention_1(lstm, adjacency_matrics)
        lstm = lstm.unsqueeze(1)
        lstm = lstm.expand(lstm.size(0),
                          self.maximum_length,
                          self.maximum_length,
                          2*self.hidden_dim[0])
        
        gcn_1 = self.gcn_layer_1(lstm, attention_weight_1)
        gcn_1 = torch.relu(gcn_1) # B X N X H
        gcn_1 = self.dropout(gcn_1)
        
        attention_weight_2 = self.attention_2(gcn_1, adjacency_matrics)
        gcn_1 = gcn_1.unsqueeze(1)
        gcn_1 = gcn_1.expand(gcn_1.size(0),
                          self.maximum_length,
                          self.maximum_length,
                          self.hidden_dim[0])
        gcn_2 = self.gcn_layer_2(gcn_1, attention_weight_2)
        gcn_2 = torch.relu(gcn_2) # (B, N, H')
        
        genres = self.genre_embedding(genres) # (B, N', G)
        genre_features = self.genre_encoder(genres) # (B, H')
        attention_weight_3 = (gcn_2*genre_features.unsqueeze(1)).sum(2) # (B, N)
        zero_vec = -9e15*torch.ones_like(attention_weight_3)
        attention_weight_3 = torch.where(sentences > 0, attention_weight_3, zero_vec)
        attention_weight_3 = torch.softmax(attention_weight_3, dim=1) # (B, N)
        
        sentence_representations = (gcn_2*attention_weight_3.unsqueeze(2)).sum(1) # (B, H')
        
        output = self.output_layer(sentence_representations)
        
        return output

In [12]:
# =============================================
# Model Initialize
# =============================================
print("Model Initializing..")
weight = torch.ones(2).cuda()
weight[0] = 0.05
criterion = nn.CrossEntropyLoss(weight=weight)

model = Model(len(word2id), len(edge2id), len(genre2id), 
              LEAKY_ALPHA, EMBEDDING_DIM, GENRE_EMBEDDING_DIM, HIDDEN_STATES, 
              NUM_FILTERS, FILTER_SIZES, maximum_length, maximum_genre_length, 
              word_embedding.type("torch.FloatTensor"), DROPOUT_RATE)
model = nn.DataParallel(model).cuda()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

Model Initializing..


In [None]:
# =============================================
# Model Training
# =============================================
model.eval()

batches = make_batch(_validation, BATCH_SIZE, word2id, book2genre, False)
if len(_validation) % BATCH_SIZE == 0:
    batch_num = int(len(_validation)/BATCH_SIZE)
else:
    batch_num = int(len(_validation)/BATCH_SIZE) + 1

step = 0
count = 0
correct = 0
positive_answer = 0
positive_actual = 0
for batch in batches:
        sentences, adjacency_matrics, labels, genres = batch
        input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
        input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
        input_labels = torch.tensor(labels, dtype=torch.long).cuda()
        input_genres = torch.tensor(genres, dtype=torch.long).cuda()
        logits = torch.argmax(model(input_sentences, input_adjacency_matrics, input_genres), dim=1)

        positive_answer += logits.sum().item()
        positive_actual += (input_labels == 1.0).float().sum().item()
        correct+=(logits*input_labels).sum().item()
        count+= (logits==input_labels).sum().item()
        step+=1
                
        sys.stdout.flush()
        sys.stdout.write("\r" + "Batch: [{}/{}]".format(step, batch_num))

accuracy = 100*float(count)/len(_validation)
if positive_answer == 0:
    precision = 0.0
else:
    precision = float(correct)/positive_answer
recall = float(correct)/positive_actual
if (precision+recall) == 0.0:
    f1 = 0.0
else:
    f1 = 2*precision*recall/(precision+recall)
sys.stdout.flush()
sys.stdout.write("\r" + "Before Training [A/P/R/F]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}]".format(accuracy, precision, recall, f1))

best_model = {}
best = np.zeros(3)
print("\nModel Training..")
for i in range(EPOCHS):
    model.train()
    start = time.time()
    
    if len(_train) % BATCH_SIZE == 0:
        batch_num = int(len(_train)/BATCH_SIZE)
    else:
        batch_num = int(len(_train)/BATCH_SIZE) + 1
    
    loss = .0
    batches = make_batch(_train, BATCH_SIZE, word2id, book2genre)
    step = 0

    for batch in batches:
        sentences, adjacency_matrics, labels, genres = batch
        input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
        input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
        input_labels = torch.tensor(labels, dtype=torch.long).cuda()
        input_genres = torch.tensor(genres, dtype=torch.long).cuda()
        optimizer.zero_grad()
        logits = model(input_sentences, input_adjacency_matrics, input_genres)
        _loss = criterion(logits, input_labels).sum()
        _loss.backward()
        optimizer.step()
        loss += _loss.item()
        step+=1
        
        sys.stdout.flush()
        sys.stdout.write("\r" + "Epoch: [{}/{}] Batch: [{}/{}]".format(i+1, EPOCHS, step, batch_num))
    
    if (i+1) % 1 == 0:
        model.eval()

        batches = make_batch(_validation, BATCH_SIZE, word2id, book2genre, False)
        if len(_validation) % BATCH_SIZE == 0:
            batch_num = int(len(_validation)/BATCH_SIZE)
        else:
            batch_num = int(len(_validation)/BATCH_SIZE) + 1

        step = 0
        count = 0
        correct = 0
        positive_answer = 0
        positive_actual = 0
        for batch in batches:
            sentences, adjacency_matrics, labels, genres = batch
            input_sentences = torch.tensor(sentences, dtype = torch.long).cuda()
            input_adjacency_matrics = torch.stack([matrix.to_dense() for matrix in adjacency_matrics], dim=0).cuda()
            input_labels = torch.tensor(labels, dtype=torch.long).cuda()
            input_genres = torch.tensor(genres, dtype=torch.long).cuda()
            logits = torch.argmax(model(input_sentences, input_adjacency_matrics, input_genres), dim=1)

            positive_answer += logits.sum().item()
            positive_actual += (input_labels == 1.0).float().sum().item()
            correct+=(logits*input_labels).sum().item()
            count+= (logits==input_labels).sum().item()
            step+=1

            sys.stdout.flush()
            sys.stdout.write("\r" + "Epoch: [{}/{}] Eval Batch: [{}/{}]".format(i+1, EPOCHS, step, batch_num))

        accuracy = 100*float(count)/len(_validation)
        if positive_answer == 0:
            precision = 0.0
        else:
            precision = float(correct)/positive_answer
        recall = float(correct)/positive_actual
        if (precision+recall) == 0.0:
            f1 = 0.0
        else:
            f1 = 2*precision*recall/(precision+recall)
        print(" [A/P/R/F]: [{:.4f}/{:.4f}/{:.4f}/{:.4f}] Loss: {:.4f}".format(accuracy, precision, recall, f1, loss))
        if f1 > best[2]:
            best = precision, recall, f1
            best_model = copy.deepcopy(model.state_dict())
            print(" Current Best:)")

Before Training [A/P/R/F]: [2.5829/0.0258/1.0000/0.0504]
Model Training..
Epoch: [1/300] Eval Batch: [183/183] [A/P/R/F]: [87.5316/0.1025/0.4932/0.1697] Loss: 10379.9422
 Current Best:)
Epoch: [2/300] Eval Batch: [183/183] [A/P/R/F]: [86.0086/0.0936/0.5089/0.1582] Loss: 9891.2980
Epoch: [3/300] Eval Batch: [183/183] [A/P/R/F]: [86.0396/0.0962/0.5246/0.1626] Loss: 9703.9916
Epoch: [4/300] Eval Batch: [183/183] [A/P/R/F]: [83.5027/0.0876/0.5718/0.1519] Loss: 9589.6648
Epoch: [5/300] Eval Batch: [183/183] [A/P/R/F]: [85.5112/0.0947/0.5387/0.1611] Loss: 9513.9335
Epoch: [6/300] Eval Batch: [183/183] [A/P/R/F]: [85.5743/0.0983/0.5611/0.1673] Loss: 9458.6429
Epoch: [7/300] Eval Batch: [183/183] [A/P/R/F]: [83.6460/0.0881/0.5702/0.1526] Loss: 9413.1096
Epoch: [8/300] Eval Batch: [183/183] [A/P/R/F]: [90.0695/0.1213/0.4555/0.1916] Loss: 9381.7152
 Current Best:)
Epoch: [9/300] Batch: [7828/19982]