In [1]:
! pip install transformers
! pip install nltk



In [2]:
import torch

import random
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim

from torch.optim.lr_scheduler import MultiStepLR
from torch.utils import data
from transformers import RobertaTokenizer, RobertaModel
from sklearn.metrics import f1_score, roc_auc_score

SEED = 412413

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
df = pd.read_csv("final_train_data.csv")

In [4]:
import math

df = df.sample(frac=1)

train_num = math.ceil(0.7 * len(df))
valid_num = math.ceil(0.9 * len(df))
train_data = df.iloc[:train_num, :].reset_index()
valid_data = df.iloc[train_num:valid_num, :].reset_index()
test_data = df.iloc[valid_num:, :].reset_index()

In [5]:
args = {
    'batch_size': 256,
    'lr': 3e-4,
    'hidden_dim': 128,
    'n_layers': 1,
    'bidirectional': True,
    'dropout': 0.2,
    'n_epochs': 20,
    'b1': 0.9,
    'b2': 0.999,
    'weight_decay': 0.01,
    'lr_decay': 0.7
}

num_positive = (df["sentiment"] == "positive").sum()
num_negative = (df["sentiment"] == "negative").sum()
num_neutral = (df["sentiment"] == "neutral").sum()

args["weight"] = torch.tensor([num_negative / len(df), num_neutral / len(df), num_positive / len(df)], dtype=torch.float32)

print(args["weight"])

tensor([0.1568, 0.4638, 0.3794])


In [6]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [7]:
tokenized_train = train_data['text'].apply((
    lambda x: tokenizer.encode(x, add_special_tokens=True)))
tokenized_valid = valid_data['text'].apply((
    lambda x: tokenizer.encode(x, add_special_tokens=True)))
tokenized_test = test_data['text'].apply((
    lambda x: tokenizer.encode(x, add_special_tokens=True)))

In [8]:
def get_max_len(tokenized):
    max_len = 0
    for i in tokenized.values:
        if len(i) > max_len:
            max_len = len(i)
    return max_len

In [9]:
max_len_train = get_max_len(tokenized_train)
print(max_len_train)
max_len_valid = get_max_len(tokenized_valid)
print(max_len_valid)
max_len_test = get_max_len(tokenized_test)
print(max_len_test)
max_len = max([max_len_train, max_len_valid, max_len_test])

52
59
45


In [10]:
padded_train = torch.tensor([i + [0] * (max_len - len(i)) 
                             for i in tokenized_train.values])
padded_valid = torch.tensor([i + [0] * (max_len - len(i)) 
                             for i in tokenized_valid.values])
padded_test = torch.tensor([i + [0] * (max_len - len(i)) 
                            for i in tokenized_test.values])

In [11]:
train_label = torch.tensor(train_data['sentiment'].replace(
    to_replace='positive', value=2).replace(
    to_replace='negative', value=0).replace(
    to_replace='neutral', value=1))
valid_label = torch.tensor(valid_data['sentiment'].replace(
    to_replace='positive', value=2).replace(
    to_replace='negative', value=0).replace(
    to_replace='neutral', value=1))
test_label = torch.tensor(test_data['sentiment'].replace(
    to_replace='positive', value=2).replace(
    to_replace='negative', value=0).replace(
    to_replace='neutral', value=1))

In [12]:
# Define the dataset and data iterators
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, x, labels):
        'Initialization'
        self.x = x
        self.labels = labels

    def __len__(self):
        'Denotes the total number of samples'
        return self.x.shape[0]

    def __getitem__(self, index):
        'Generates one sample of data'

        # Load data and get label
        x = self.x[index]
        y = self.labels[index]

        return x, y

In [13]:
trainset = Dataset(padded_train, train_label)
validset = Dataset(padded_valid, valid_label)
testset = Dataset(padded_test, test_label)

train_loader = torch.utils.data.DataLoader(trainset,
                                           batch_size=args['batch_size'],
                                           shuffle=True,
                                           drop_last=True)
valid_loader = torch.utils.data.DataLoader(validset,
                                           batch_size=args['batch_size'],
                                           shuffle=True,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(testset,
                                           batch_size=args['batch_size'],
                                           shuffle=True,
                                           drop_last=True)

In [14]:
# torch.save(trainset, "trainset.pt")
# torch.save(validset, "validset.pt")
# torch.save(testset, "testset.pt")

In [15]:
class BERTGRUSentiment(nn.Module):
    def __init__(self,
                 bert,
                 hidden_dim,
                 output_dim,
                 n_layers,
                 bidirectional,
                 dropout):
        
        super().__init__()
        
        self.bert = bert
        
#         embedding_dim = bert.config.to_dict()['dim']
        embedding_dim = 768
    
        self.rnn = nn.GRU(embedding_dim,
                          hidden_dim,
                          num_layers = n_layers,
                          bidirectional = bidirectional,
                          batch_first = True,
                          dropout = 0 if n_layers < 2 else dropout)
        
        self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        
        #text = [batch size, sent len]
        attention_mask = text.masked_fill(text != 0, 1)
                
        with torch.no_grad():
            embedded = self.bert(text, attention_mask=attention_mask)[0]
                
        #embedded = [batch size, sent len, emb dim]
        
        _, hidden = self.rnn(embedded)
        
        #hidden = [n layers * n directions, batch size, emb dim]
        
        if self.rnn.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
        else:
            hidden = self.dropout(hidden[-1,:,:])
                
        #hidden = [batch size, hid dim]
        
        output = self.out(hidden)
        
        #output = [batch size, out dim]
        
        return output

In [19]:
class BERTLSTMSentiment(nn.Module):
    def __init__(self,
                 bert,
                 hidden_dim,
                 output_dim,
                 n_layers,
                 bidirectional,
                 dropout):
        
        super().__init__()
        
        self.bert = bert
        
#         embedding_dim = bert.config.to_dict()['dim']
        embedding_dim = 768
    
        self.rnn = nn.LSTM(embedding_dim,
                          hidden_dim,
                          num_layers = n_layers,
                          bidirectional = bidirectional,
                          batch_first = True,
                          dropout = 0 if n_layers < 2 else dropout)
        
        self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.tanh = nn.Tanh()
        
    def forward(self, text):
        
        #text = [batch size, sent len]
        attention_mask = text.masked_fill(text != 0, 1)
                
        with torch.no_grad():
            embedded = self.bert(text, attention_mask=attention_mask)[0]
                
        #embedded = [batch size, sent len, emb dim]
        
        _, (hidden, _) = self.rnn(embedded)
        
        #hidden = [n layers * n directions, batch size, emb dim]
        
        if self.rnn.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
        else:
            hidden = self.dropout(hidden[-1,:,:])
                
        #hidden = [batch size, hid dim]
        
        output = self.out(hidden)
        
        # new activation
        sig_out = self.tanh(output)
        
        #output = [batch size, out dim]
        
        return sig_out

In [20]:
bert = RobertaModel.from_pretrained('roberta-base')

In [21]:
# model = BERTGRUSentiment(bert,
#                          args['hidden_dim'],
#                          3,
#                          args['n_layers'],
#                          args['bidirectional'],
#                          args['dropout']).to(device)
model = BERTLSTMSentiment(bert,
                         args['hidden_dim'],
                         3,
                         args['n_layers'],
                         args['bidirectional'],
                         args['dropout']).to(device)

In [22]:
for name, param in model.named_parameters():                
    if name.startswith('bert'):
        param.requires_grad = False

In [23]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 920,323 trainable parameters


In [24]:
for name, param in model.named_parameters():                
    if param.requires_grad:
        print(name)

rnn.weight_ih_l0
rnn.weight_hh_l0
rnn.bias_ih_l0
rnn.bias_hh_l0
rnn.weight_ih_l0_reverse
rnn.weight_hh_l0_reverse
rnn.bias_ih_l0_reverse
rnn.bias_hh_l0_reverse
out.weight
out.bias


In [25]:
# optimizer = optim.Adam(model.parameters(), 
#                        lr=args['lr'], 
#                        betas=(args["b1"], args["b2"]),
#                        weight_decay=args["weight_decay"])

optimizer = optim.AdamW(model.parameters(), 
                       lr=args['lr'], 
                       betas=(args["b1"], args["b2"]),
                       weight_decay=args["weight_decay"])

# optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=args["lr"])

scheduler = MultiStepLR(optimizer, milestones=[20, 40], gamma=args["lr_decay"])

criterion = nn.CrossEntropyLoss(weight=args['weight']).to(device)

In [26]:
def multi_acc(y_pred, y_label):
    softmax = nn.Softmax(dim=1)
    y_pred_softmax = softmax(y_pred)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
#     print(y_pred_tags)

    # accu
    correct_pred = (y_pred_tags == y_label).float()
    acc = correct_pred.sum() / len(y_label)

    # roc-auc
    one_hot_label = nn.functional.one_hot(y_label)
#     roc_auc = roc_auc_score(one_hot_label.detach().cpu(), y_pred_softmax.detach().cpu(), average="macro")

    # f1
    f1 = f1_score(y_label.detach().cpu(), y_pred_tags.detach().cpu(), average='weighted')
    
    return acc, 1, f1

In [27]:
def train(model, data_loader, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_rocauc = 0
    epoch_f1 = 0
    
    model.train()
    
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()

        predictions = model(data).squeeze(1)

        loss = criterion(predictions, target)
        
        acc, roc_auc, f1 = multi_acc(predictions, target)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        epoch_rocauc += roc_auc
        epoch_f1 += f1

        print("batch idx {}: | train loss: {} | train accu: {:.3f} | train roc: {:.3f} | train f1: {}".format(
            batch_idx, loss.item(), acc.item(), roc_auc, f1))
        
    return epoch_loss / len(data_loader), epoch_acc / len(data_loader), epoch_rocauc / len(data_loader), epoch_f1 / len(data_loader)

In [28]:
def evaluate(model, data_loader, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    epoch_rocauc = 0
    epoch_f1 = 0
    model.eval()
    
    with torch.no_grad():
    
        for batch_idx, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            
            predictions = model(data).squeeze(1)
            loss = criterion(predictions, target)
            
            acc, roc_auc, f1 = multi_acc(predictions, target)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_rocauc += roc_auc
            epoch_f1 += f1
        
    return epoch_loss / len(data_loader), epoch_acc / len(data_loader), epoch_rocauc / len(data_loader), epoch_f1 / len(data_loader)

In [29]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
history = {
    "train_loss": [],
    "valid_loss": []
}

import time

best_valid_loss = float('inf')

for epoch in range(args['n_epochs']):
    
    start_time = time.time()
    
    train_loss, train_acc, train_rocauc, train_f1 = train(model, train_loader, optimizer, criterion)
    history["train_loss"].append(train_loss)
    valid_loss, valid_acc, valid_rocauc, valid_f1 = evaluate(model, valid_loader, criterion)
    history["valid_loss"].append(valid_loss)
    scheduler.step()
        
    end_time = time.time()
        
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'best_model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f} | Train rocauc: {train_rocauc} | Train f1: {train_f1}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f} | Val. rocauc: {valid_rocauc} | Val. f1: {valid_f1}%')

batch idx 0: | train loss: 1.127386450767517 | train accu: 0.211 | train roc: 1.000 | train f1: 0.23354078618250818
batch idx 1: | train loss: 1.0826241970062256 | train accu: 0.398 | train roc: 1.000 | train f1: 0.3911313657407407
batch idx 2: | train loss: 1.0369826555252075 | train accu: 0.484 | train roc: 1.000 | train f1: 0.435211879645169


  'precision', 'predicted', average, warn_for)


batch idx 3: | train loss: 1.010359764099121 | train accu: 0.457 | train roc: 1.000 | train f1: 0.40250075665859564
batch idx 4: | train loss: 0.9804185628890991 | train accu: 0.469 | train roc: 1.000 | train f1: 0.382580974544073
batch idx 5: | train loss: 0.9657912850379944 | train accu: 0.465 | train roc: 1.000 | train f1: 0.3555530954790823
batch idx 6: | train loss: 0.9570099115371704 | train accu: 0.426 | train roc: 1.000 | train f1: 0.2890475335249042
batch idx 7: | train loss: 0.9345581531524658 | train accu: 0.434 | train roc: 1.000 | train f1: 0.27606012040595723
batch idx 8: | train loss: 0.9311413764953613 | train accu: 0.461 | train roc: 1.000 | train f1: 0.3054148092473564
batch idx 9: | train loss: 0.9040330648422241 | train accu: 0.469 | train roc: 1.000 | train f1: 0.3057107528851438
batch idx 10: | train loss: 0.9125540852546692 | train accu: 0.465 | train roc: 1.000 | train f1: 0.2975
batch idx 11: | train loss: 0.8747515678405762 | train accu: 0.488 | train roc: 1.0

batch idx 73: | train loss: 0.8304031491279602 | train accu: 0.516 | train roc: 1.000 | train f1: 0.3508376288659794
batch idx 74: | train loss: 0.8363572359085083 | train accu: 0.480 | train roc: 1.000 | train f1: 0.31186098284960423
batch idx 75: | train loss: 0.8369020223617554 | train accu: 0.504 | train roc: 1.000 | train f1: 0.33768262987012987
batch idx 76: | train loss: 0.8463298082351685 | train accu: 0.516 | train roc: 1.000 | train f1: 0.3508376288659794
batch idx 77: | train loss: 0.83852219581604 | train accu: 0.492 | train roc: 1.000 | train f1: 0.3246891361256545
batch idx 78: | train loss: 0.8190856575965881 | train accu: 0.500 | train roc: 1.000 | train f1: 0.3333333333333333
batch idx 79: | train loss: 0.9012531042098999 | train accu: 0.387 | train roc: 1.000 | train f1: 0.21569102112676058
batch idx 80: | train loss: 0.8430414199829102 | train accu: 0.477 | train roc: 1.000 | train f1: 0.3076223544973545
batch idx 81: | train loss: 0.8541215658187866 | train accu: 0.

batch idx 21: | train loss: 0.8586851954460144 | train accu: 0.477 | train roc: 1.000 | train f1: 0.3887674307841457
batch idx 22: | train loss: 0.8371204137802124 | train accu: 0.520 | train roc: 1.000 | train f1: 0.442697874832664
batch idx 23: | train loss: 0.8343857526779175 | train accu: 0.484 | train roc: 1.000 | train f1: 0.41476969430410204
batch idx 24: | train loss: 0.8332566022872925 | train accu: 0.562 | train roc: 1.000 | train f1: 0.49880930573365084
batch idx 25: | train loss: 0.7935106158256531 | train accu: 0.535 | train roc: 1.000 | train f1: 0.4783159859572784
batch idx 26: | train loss: 0.8142514228820801 | train accu: 0.520 | train roc: 1.000 | train f1: 0.47117141812865493
batch idx 27: | train loss: 0.906700849533081 | train accu: 0.438 | train roc: 1.000 | train f1: 0.3898426120348827
batch idx 28: | train loss: 0.8478903770446777 | train accu: 0.543 | train roc: 1.000 | train f1: 0.4891915327288294
batch idx 29: | train loss: 0.8386271595954895 | train accu: 0.

batch idx 90: | train loss: 0.8380853533744812 | train accu: 0.559 | train roc: 1.000 | train f1: 0.5059982764876633
batch idx 91: | train loss: 0.7919902205467224 | train accu: 0.559 | train roc: 1.000 | train f1: 0.5123141104714912
batch idx 92: | train loss: 0.785679042339325 | train accu: 0.570 | train roc: 1.000 | train f1: 0.5241834055522915
batch idx 93: | train loss: 0.8284611105918884 | train accu: 0.590 | train roc: 1.000 | train f1: 0.5325112620003001
batch idx 94: | train loss: 0.7869013547897339 | train accu: 0.590 | train roc: 1.000 | train f1: 0.5424417325798668
batch idx 95: | train loss: 0.7997968196868896 | train accu: 0.547 | train roc: 1.000 | train f1: 0.49164718488213527
batch idx 96: | train loss: 0.7961394190788269 | train accu: 0.566 | train roc: 1.000 | train f1: 0.519928600511866
batch idx 97: | train loss: 0.8779410719871521 | train accu: 0.504 | train roc: 1.000 | train f1: 0.44780098545777614
batch idx 98: | train loss: 0.7865733504295349 | train accu: 0.5

batch idx 38: | train loss: 0.7979675531387329 | train accu: 0.539 | train roc: 1.000 | train f1: 0.4828778557626734
batch idx 39: | train loss: 0.8481553196907043 | train accu: 0.512 | train roc: 1.000 | train f1: 0.4359012394514768
batch idx 40: | train loss: 0.8266497850418091 | train accu: 0.508 | train roc: 1.000 | train f1: 0.4038860452586207
batch idx 41: | train loss: 0.8538693785667419 | train accu: 0.508 | train roc: 1.000 | train f1: 0.40945308588594054
batch idx 42: | train loss: 0.7946193218231201 | train accu: 0.547 | train roc: 1.000 | train f1: 0.46744701275598755
batch idx 43: | train loss: 0.7787404656410217 | train accu: 0.566 | train roc: 1.000 | train f1: 0.48449001547036613
batch idx 44: | train loss: 0.8343637585639954 | train accu: 0.520 | train roc: 1.000 | train f1: 0.4450829081632653
batch idx 45: | train loss: 0.7988905310630798 | train accu: 0.551 | train roc: 1.000 | train f1: 0.4873813657407407
batch idx 46: | train loss: 0.8376437425613403 | train accu: 

batch idx 107: | train loss: 0.7955224514007568 | train accu: 0.547 | train roc: 1.000 | train f1: 0.46089086416742153
batch idx 108: | train loss: 0.8513622283935547 | train accu: 0.488 | train roc: 1.000 | train f1: 0.4208551912499496
batch idx 109: | train loss: 0.8690065741539001 | train accu: 0.465 | train roc: 1.000 | train f1: 0.4027082717327025
batch idx 110: | train loss: 0.8313997983932495 | train accu: 0.547 | train roc: 1.000 | train f1: 0.4941707138216129
batch idx 111: | train loss: 0.857966959476471 | train accu: 0.555 | train roc: 1.000 | train f1: 0.5025120070778564
batch idx 112: | train loss: 0.8254427313804626 | train accu: 0.520 | train roc: 1.000 | train f1: 0.47967995061816127
batch idx 113: | train loss: 0.7855104804039001 | train accu: 0.594 | train roc: 1.000 | train f1: 0.5438577410408396
batch idx 114: | train loss: 0.8036777973175049 | train accu: 0.547 | train roc: 1.000 | train f1: 0.4939833263422819
batch idx 115: | train loss: 0.79694002866745 | train a

batch idx 55: | train loss: 0.7520896792411804 | train accu: 0.602 | train roc: 1.000 | train f1: 0.5429211696919034
batch idx 56: | train loss: 0.8168467283248901 | train accu: 0.531 | train roc: 1.000 | train f1: 0.4848153918214402
batch idx 57: | train loss: 0.819250762462616 | train accu: 0.539 | train roc: 1.000 | train f1: 0.48258404563608026
batch idx 58: | train loss: 0.8042557239532471 | train accu: 0.562 | train roc: 1.000 | train f1: 0.49131988396624476
batch idx 59: | train loss: 0.768286406993866 | train accu: 0.629 | train roc: 1.000 | train f1: 0.5794335332817337
batch idx 60: | train loss: 0.7985471487045288 | train accu: 0.551 | train roc: 1.000 | train f1: 0.508794452109667
batch idx 61: | train loss: 0.8237888216972351 | train accu: 0.543 | train roc: 1.000 | train f1: 0.49226184039792387
batch idx 62: | train loss: 0.788175642490387 | train accu: 0.578 | train roc: 1.000 | train f1: 0.5399130147202443
batch idx 63: | train loss: 0.8006595969200134 | train accu: 0.53

batch idx 3: | train loss: 0.7630407810211182 | train accu: 0.590 | train roc: 1.000 | train f1: 0.5261315049446988
batch idx 4: | train loss: 0.7705289125442505 | train accu: 0.609 | train roc: 1.000 | train f1: 0.5403501578263291
batch idx 5: | train loss: 0.7629514336585999 | train accu: 0.594 | train roc: 1.000 | train f1: 0.5313426948956841
batch idx 6: | train loss: 0.7951694130897522 | train accu: 0.562 | train roc: 1.000 | train f1: 0.49124370594159106
batch idx 7: | train loss: 0.8275613784790039 | train accu: 0.531 | train roc: 1.000 | train f1: 0.4653404209621993
batch idx 8: | train loss: 0.8125942349433899 | train accu: 0.531 | train roc: 1.000 | train f1: 0.4794882015306123
batch idx 9: | train loss: 0.7771074771881104 | train accu: 0.598 | train roc: 1.000 | train f1: 0.5492039295392954
batch idx 10: | train loss: 0.805815577507019 | train accu: 0.566 | train roc: 1.000 | train f1: 0.508820564516129
batch idx 11: | train loss: 0.8025102019309998 | train accu: 0.582 | tra

batch idx 73: | train loss: 0.775240421295166 | train accu: 0.586 | train roc: 1.000 | train f1: 0.5223992145394414
batch idx 74: | train loss: 0.7773873209953308 | train accu: 0.555 | train roc: 1.000 | train f1: 0.4905830753353973
batch idx 75: | train loss: 0.8148161768913269 | train accu: 0.543 | train roc: 1.000 | train f1: 0.47712779050374743
batch idx 76: | train loss: 0.8232009410858154 | train accu: 0.496 | train roc: 1.000 | train f1: 0.42905254633344025
batch idx 77: | train loss: 0.8271745443344116 | train accu: 0.512 | train roc: 1.000 | train f1: 0.4399283622007132
batch idx 78: | train loss: 0.8067781925201416 | train accu: 0.559 | train roc: 1.000 | train f1: 0.4924547697368421
batch idx 79: | train loss: 0.7696845531463623 | train accu: 0.578 | train roc: 1.000 | train f1: 0.5152638762511373
batch idx 80: | train loss: 0.8141669631004333 | train accu: 0.566 | train roc: 1.000 | train f1: 0.4992839845092924
batch idx 81: | train loss: 0.7854375839233398 | train accu: 0.

batch idx 21: | train loss: 0.832325279712677 | train accu: 0.555 | train roc: 1.000 | train f1: 0.4967416638069092
batch idx 22: | train loss: 0.7989644408226013 | train accu: 0.547 | train roc: 1.000 | train f1: 0.49873820479962727
batch idx 23: | train loss: 0.7444006204605103 | train accu: 0.645 | train roc: 1.000 | train f1: 0.6013655851953799
batch idx 24: | train loss: 0.7631651163101196 | train accu: 0.598 | train roc: 1.000 | train f1: 0.539682911879409
batch idx 25: | train loss: 0.7353612184524536 | train accu: 0.633 | train roc: 1.000 | train f1: 0.5850014400921659
batch idx 26: | train loss: 0.7667255401611328 | train accu: 0.598 | train roc: 1.000 | train f1: 0.5507612179487179
batch idx 27: | train loss: 0.8502188920974731 | train accu: 0.516 | train roc: 1.000 | train f1: 0.4505359378711695
batch idx 28: | train loss: 0.7374197840690613 | train accu: 0.617 | train roc: 1.000 | train f1: 0.5620039682539683
batch idx 29: | train loss: 0.796433687210083 | train accu: 0.578

batch idx 91: | train loss: 0.7918955087661743 | train accu: 0.609 | train roc: 1.000 | train f1: 0.541664297088262
batch idx 92: | train loss: 0.8643584251403809 | train accu: 0.500 | train roc: 1.000 | train f1: 0.43981804586241274
batch idx 93: | train loss: 0.7458471655845642 | train accu: 0.594 | train roc: 1.000 | train f1: 0.5493286400481686
batch idx 94: | train loss: 0.8009623289108276 | train accu: 0.590 | train roc: 1.000 | train f1: 0.5378639914772728
batch idx 95: | train loss: 0.7563493251800537 | train accu: 0.598 | train roc: 1.000 | train f1: 0.5453630277207782
batch idx 96: | train loss: 0.7506570219993591 | train accu: 0.633 | train roc: 1.000 | train f1: 0.5763257575757575
batch idx 97: | train loss: 0.7937060594558716 | train accu: 0.570 | train roc: 1.000 | train f1: 0.5147903799363653
batch idx 98: | train loss: 0.8254976272583008 | train accu: 0.516 | train roc: 1.000 | train f1: 0.4592775775105436
batch idx 99: | train loss: 0.8196495175361633 | train accu: 0.5

batch idx 39: | train loss: 0.8606077432632446 | train accu: 0.516 | train roc: 1.000 | train f1: 0.4560136683481196
batch idx 40: | train loss: 0.7914617657661438 | train accu: 0.582 | train roc: 1.000 | train f1: 0.5313134854810452
batch idx 41: | train loss: 0.761401355266571 | train accu: 0.605 | train roc: 1.000 | train f1: 0.5617382177681474
batch idx 42: | train loss: 0.8235064744949341 | train accu: 0.559 | train roc: 1.000 | train f1: 0.4967499314692983
batch idx 43: | train loss: 0.7522951364517212 | train accu: 0.613 | train roc: 1.000 | train f1: 0.5657204581993569
batch idx 44: | train loss: 0.8083391785621643 | train accu: 0.559 | train roc: 1.000 | train f1: 0.4994736920688007
batch idx 45: | train loss: 0.7610985636711121 | train accu: 0.566 | train roc: 1.000 | train f1: 0.5056051826511544
batch idx 46: | train loss: 0.7390798926353455 | train accu: 0.594 | train roc: 1.000 | train f1: 0.5182542067307693
batch idx 47: | train loss: 0.7946428656578064 | train accu: 0.57

batch idx 108: | train loss: 0.7738227844238281 | train accu: 0.586 | train roc: 1.000 | train f1: 0.5396487813600352
batch idx 109: | train loss: 0.8255994319915771 | train accu: 0.527 | train roc: 1.000 | train f1: 0.46311648170756115
batch idx 110: | train loss: 0.7635941505432129 | train accu: 0.562 | train roc: 1.000 | train f1: 0.5048654878618113
batch idx 111: | train loss: 0.8023726344108582 | train accu: 0.555 | train roc: 1.000 | train f1: 0.4887126189983444
batch idx 112: | train loss: 0.7709954977035522 | train accu: 0.590 | train roc: 1.000 | train f1: 0.5247081644568723
batch idx 113: | train loss: 0.7796867489814758 | train accu: 0.582 | train roc: 1.000 | train f1: 0.4981635334996436
batch idx 114: | train loss: 0.7584115266799927 | train accu: 0.602 | train roc: 1.000 | train f1: 0.5421184289127838
batch idx 115: | train loss: 0.7598327398300171 | train accu: 0.578 | train roc: 1.000 | train f1: 0.5075116131756756
batch idx 116: | train loss: 0.816075325012207 | train 

batch idx 56: | train loss: 0.7630661725997925 | train accu: 0.570 | train roc: 1.000 | train f1: 0.5074551458990536
batch idx 57: | train loss: 0.7896111011505127 | train accu: 0.582 | train roc: 1.000 | train f1: 0.515315031949549
batch idx 58: | train loss: 0.8120903968811035 | train accu: 0.555 | train roc: 1.000 | train f1: 0.4784647371828362
batch idx 59: | train loss: 0.8123336434364319 | train accu: 0.551 | train roc: 1.000 | train f1: 0.48359837729742927
batch idx 60: | train loss: 0.7809569835662842 | train accu: 0.582 | train roc: 1.000 | train f1: 0.539200141829715
batch idx 61: | train loss: 0.7838171720504761 | train accu: 0.578 | train roc: 1.000 | train f1: 0.5357277594136246
batch idx 62: | train loss: 0.8138625025749207 | train accu: 0.547 | train roc: 1.000 | train f1: 0.4977378090659341
batch idx 63: | train loss: 0.7826496362686157 | train accu: 0.598 | train roc: 1.000 | train f1: 0.5369232110224103
batch idx 64: | train loss: 0.7837482690811157 | train accu: 0.59

In [None]:
model.load_state_dict(torch.load('best_model.pt'))

In [None]:
valid_loss, valid_acc, valid_rocauc, valid_f1 = evaluate(model, valid_loader, criterion)
print("Valid loss: {} | Valid Acc: {:.3f} | Valid ROC-AUC: {} | Valid f1: {}".format(
    valid_loss, valid_acc, valid_rocauc, valid_f1))
test_loss, test_acc, test_rocauc, test_f1 = evaluate(model, test_loader, criterion)
print("Test loss: {} | Test Acc: {:.3f} | Test ROC-AUC: {} | Test f1: {}".format(
    test_loss, test_acc, test_rocauc, test_f1))

In [None]:
def plot_history(hist):
    plt.figure(figsize=(10, 7))
    plt.plot(np.arange(1, len(history["train_loss"]) + 1), history["train_loss"], label="training loss")
    plt.plot(np.arange(1, len(history["train_loss"]) + 1), history["valid_loss"], label="validation loss")
    plt.legend(loc="best")
    plt.title("Training and Validation Losses")
    plt.show()

In [None]:
import matplotlib.pyplot as plt

plot_history(history)