In [1]:
!nvidia-smi

Wed May 31 15:53:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.156.00   Driver Version: 450.156.00   CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      On   | 00000000:07:00.0 Off |                    0 |
| N/A   34C    P0    90W / 400W |  40380MiB / 40537MiB |    100%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  A100-SXM4-40GB      On   | 00000000:0F:00.0 Off |                    0 |
| N/A   34C    P0    94W / 400W |  39450MiB / 40537MiB |     79%      Default |
|       

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'

# Import

In [3]:
!pip install -qq transformers
!pip install -qq torchsummary
!pip install -qq scikit-multilearn
!pip install -qq tqdm
!pip install -qq ipywidgets

In [4]:
import torch
import io
import torch.nn.functional as F
import random
import numpy as np
import time
import math
import datetime
import torch.nn as nn
import pandas as pd
from transformers import *
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import warnings
warnings.filterwarnings("ignore", 'This pattern has match groups')
import logging
logging.getLogger("transformers").setLevel(logging.WARNING)

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [5]:
seed_val = 1906350912
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)

<torch._C.Generator at 0x7fac85ce6730>

In [6]:
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(seed_val)

In [7]:
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('Tersedia sejumlah %d GPU(s).' % torch.cuda.device_count())
    print('GPU yang akan digunakan:', torch.cuda.get_device_name(0))
else:
    print('Tidak mendukung GPU; hanya CPU')
    device = torch.device("cpu")

Tersedia sejumlah 1 GPU(s).
GPU yang akan digunakan: A100-SXM4-40GB


In [8]:
df_train = pd.read_csv(f"Dataset/Human_Annotated.csv", index_col="ID")
df_train.fillna("", inplace = True)
df_test = pd.read_csv(f"Dataset/Gold_Standard.csv", index_col="ID")
df_test.fillna("", inplace = True)
categories = ['Anak', 'Bedah', 'Gigi', 'Gizi', 'Jantung', 'Jiwa',
       'Kandungan', 'Kulit dan Kelamin', 'Mata', 'Paru', 'Penyakit Dalam',
       'Saraf', 'THT', 'Tulang', 'Urologi']

In [9]:
df_train_umumnt = df_train.drop(columns=["Umum"])
df_train_umumnt["COUNT CLASS"] = df_train_umumnt.drop(columns=["JUDUL", "ISI"]).sum(axis=1)

df_train = df_train_umumnt[df_train_umumnt["COUNT CLASS"] > 0].drop(columns=["COUNT CLASS"])
df_test['Count'] = df_test.drop(columns=['JUDUL', 'ISI']).values.sum(axis=1)
df_test = df_test[df_test['Count'] <= 3].drop(columns=['Count'])
df_test_umumnt = df_test.drop(columns=["Umum"])
df_test_umumnt["COUNT CLASS"] = df_test_umumnt.drop(columns=["JUDUL", "ISI"]).sum(axis=1)
df_test = df_test_umumnt[df_test_umumnt["COUNT CLASS"] > 0].drop(columns=["COUNT CLASS"])

In [10]:
code = ['DS', 'AD', 'DH', 'TD', 'KD']
for i in code:
  df_train[i] = df_train.index.map(lambda x: 1 if x[:2] == i else 0)

X = df_train[['JUDUL', 'ISI']].values
y = df_train.drop(columns=['JUDUL', 'ISI']).values

from skmultilearn.model_selection import iterative_train_test_split
X_train, y_train, X_val, y_val = iterative_train_test_split(X, y,test_size = 0.2)
y_train = y_train[:,:-5]
y_val = y_val[:,:-5]

In [11]:
df_test = df_test[["JUDUL", "ISI"] + categories]
X_test = df_test[['JUDUL', 'ISI']].values
y_test = df_test.drop(columns=['JUDUL', 'ISI']).values

In [12]:
df_machine = pd.read_csv(f"Dataset/Mini_Machine_Annotated.csv", index_col="ID").drop(columns=['KW_COUNT'])
df_unlabel = pd.read_csv(f"Dataset/Mini_unlabeled.csv", index_col="ID").drop(columns=['KW_COUNT'])
unlabeled_examples = list(df_machine.values) + list(df_unlabel.values)

# Param

In [13]:
#--------------------------------
#  Transformer parameters
#--------------------------------
MAX_SEQ_LENGTH = 256
BATCH_SIZE = 8

#--------------------------------
#  GAN-BERT specific parameters
#--------------------------------
# number of hidden layers in the generator, 
# each of the size of the output space
NUM_HIDDEN_LAYERS_G = 1; 
# number of hidden layers in the discriminator, 
# each of the size of the input space
NUM_HIDDEN_LAYERS_D = 1; 
# size of the generator's input noisy vectors
NOISE_SIZE = 256
# dropout to be applied to discriminator's input vectors
OUT_DROPOUT_RATE = 0.01

# Replicate labeled data to balance poorly represented datasets, 
# e.g., less than 1% of labeled material
APPLY_BALANCE = True

#--------------------------------
#  Optimization parameters
#--------------------------------
learning_rate_discriminator = 1e-5
learning_rate_generator = 1e-5
epsilon = 1e-8
REGULARIZATION = 0.02
num_train_epochs = 10
multi_gpu = False
EPOCH = 100
# Scheduler
apply_scheduler = True
WARMUP_PROPORTION = 0.1
# Print
print_each_n_step = 10

#--------------------------------
#  Adopted Tranformer model
#--------------------------------
# Since this version is compatible with Huggingface transformers, you can uncomment
# (or add) transformer models compatible with GAN
PATH = f"Model/ganbert-dapt.pt"
MODEL_NAME = "dapt-indonlu-medqna"
#model_name = "bert-base-uncased"
#model_name = "roberta-base"
#model_name = "albert-base-v2"
#model_name = "xlm-roberta-base"
#model_name = "amazon/bort"

# Tokenize

In [14]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [15]:
categories = ['Anak', 'Bedah', 'Gigi', 'Gizi', 'Jantung', 'Jiwa',
       'Kandungan', 'Kulit dan Kelamin', 'Mata', 'Paru', 'Penyakit Dalam',
       'Saraf', 'THT', 'Tulang', 'Urologi']
label2idx = {k: v for v, k in enumerate(categories)}
idx2label = {v: k for v, k in enumerate(categories)}

In [16]:
def generate_data_loader(texts, labels, masks, do_shuffle = False, balance_label_examples = False):
  '''
  Generate a Dataloader given the input examples, eventually masked if they are 
  to be considered NOT labeled.
  '''
  examples = []

  # Count the percentage of labeled examples  
  num_labeled_examples = 0
  for mask in masks:
    if mask: 
      num_labeled_examples += 1
  label_mask_rate = num_labeled_examples/len(texts)

  # if required it applies the balance
  for text, label, mask in zip(texts, labels, masks): 
    if mask == 1 or not balance_label_examples:
      examples.append((text, label, mask))
    else:
      # IT SIMULATE A LABELED EXAMPLE
      if mask:
        balance = int(1/label_mask_rate)
        balance = int(math.log(balance,2))
        if balance < 1:
          balance = 1
        for b in range(0, int(balance)):
          examples.append((text, label, mask))
      else:
        examples.append((text, label, mask))
  
  #-----------------------------------------------
  # Generate input examples to the Transformer
  #-----------------------------------------------
  input_ids = []
  input_att_mask = []
  input_label = []
  input_mask = []

  # Tokenization 
  for (text, label, mask) in examples:
    encoded_sent = tokenizer.encode(text, \
                                  add_special_tokens = True, \
                                  max_length = MAX_SEQ_LENGTH, \
                                  padding = "max_length", \
                                  truncation = True)
    input_ids.append(encoded_sent)
    input_att_mask.append([int(token_id > 0) for token_id in encoded_sent])
    input_label.append(label)
    input_mask.append(mask)
    
  # Convertion to Tensor
  input_ids = torch.tensor(input_ids) 
  input_att_mask = torch.tensor(input_att_mask)
  input_label = torch.tensor(input_label, dtype=torch.long)
  input_mask = torch.tensor(input_mask)

  # Building the TensorDataset
  dataset = TensorDataset(input_ids, input_att_mask, input_label, input_mask)

  if do_shuffle:
    sampler = RandomSampler
  else:
    sampler = SequentialSampler

  # Building the DataLoader
  return DataLoader(
              dataset,  # The training samples.
              sampler = sampler(dataset), 
              batch_size = BATCH_SIZE) # Trains with this batch size.

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [17]:
#------------------------------
#   Load the train dataset
#------------------------------
train_examples = X_train
label_examples = y_train
#The labeled (train) dataset is assigned with a mask set to True
train_label_masks = np.ones(len(train_examples), dtype=bool)
#If unlabel examples are available
if unlabeled_examples:
  train_examples = list(train_examples) + list(unlabeled_examples)

  #The unlabeled (train) dataset is assigned with a mask set to False
  tmp_masks = np.zeros(len(unlabeled_examples), dtype=bool)
  tmp_labels = [[-1] * len(categories)] *len(unlabeled_examples)
  train_label_masks = np.concatenate([train_label_masks,tmp_masks])
  label_examples = list(label_examples) + list(tmp_labels) 

train_dataloader = generate_data_loader(train_examples, label_examples, train_label_masks, do_shuffle = True, balance_label_examples = APPLY_BALANCE)

# ------------------------------
#   Load the test dataset
# ------------------------------
# The labeled (test) dataset is assigned with a mask set to True
val_label_masks = np.ones(len(X_val), dtype=bool)
val_dataloader = generate_data_loader(X_val, y_val, val_label_masks, do_shuffle = False, balance_label_examples = False)
test_label_masks = np.ones(len(X_test), dtype=bool)
test_dataloader = generate_data_loader(X_test, y_test, test_label_masks, do_shuffle = False, balance_label_examples = False)

dataset = {"Train" : train_dataloader, "Val" : val_dataloader, "Test" : test_dataloader}

  input_label = torch.tensor(input_label, dtype=torch.long)
  input_mask = torch.tensor(input_mask)


# Model

In [18]:
#------------------------------
#   The Generator as in 
#   https://www.aclweb.org/anthology/2020.acl-main.191/
#   https://github.com/crux82/ganbert
#------------------------------
class Generator(nn.Module):
    def __init__(self, noise_size=100, output_size=512, hidden_sizes=[512], dropout_rate=0.1):
        super(Generator, self).__init__()
        layers = []
        hidden_sizes = [noise_size] + hidden_sizes
        for i in range(len(hidden_sizes)-1):
            layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]), nn.LeakyReLU(0.2, inplace=True), nn.Dropout(dropout_rate)])

        layers.append(nn.Linear(hidden_sizes[-1],output_size))
        self.layers = nn.Sequential(*layers)

    def forward(self, noise):
        output_rep = self.layers(noise)
        return output_rep

#------------------------------
#   The Discriminator
#   https://www.aclweb.org/anthology/2020.acl-main.191/
#   https://github.com/crux82/ganbert
#------------------------------
class Discriminator(nn.Module):
    def __init__(self, input_size=512, hidden_sizes=[512], num_labels=2, dropout_rate=0.1):
        super(Discriminator, self).__init__()
        self.input_dropout = nn.Dropout(p=dropout_rate)
        layers = []
        hidden_sizes = [input_size] + hidden_sizes
        for i in range(len(hidden_sizes)-1):
            layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]), nn.LeakyReLU(0.2, inplace=True), nn.Dropout(dropout_rate)])

        self.layers = nn.Sequential(*layers) #per il flatten
        self.logit = nn.Linear(hidden_sizes[-1],num_labels+1) # +1 for the probability of this sample being fake/real.
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_rep):
        input_rep = self.input_dropout(input_rep)
        last_rep = self.layers(input_rep)
        logits = self.logit(last_rep)
        probs = self.sigmoid(logits)
        return last_rep, probs

In [19]:
# from torchsummary import summary

# summary(generator, (8, 100))
# print()
# summary(discriminator, (16, 1024))

In [19]:
transformer = None
generator = None
discriminator = None
def create_model(transformer, generator, discriminator):
    if transformer != None:
        del transformer, generator, discriminator
        torch.cuda.empty_cache()
    config = AutoConfig.from_pretrained(MODEL_NAME)
    hidden_size = int(config.hidden_size)
    # Define the number and width of hidden layers
    hidden_levels_g = [hidden_size for i in range(0, NUM_HIDDEN_LAYERS_G)]
    hidden_levels_d = [hidden_size for i in range(0, NUM_HIDDEN_LAYERS_D)]
    
    transformer = AutoModel.from_pretrained(MODEL_NAME)
    generator = Generator(noise_size=NOISE_SIZE, output_size=hidden_size, hidden_sizes=hidden_levels_g, dropout_rate=OUT_DROPOUT_RATE)
    discriminator = Discriminator(input_size=hidden_size, hidden_sizes=hidden_levels_d, num_labels=len(categories), dropout_rate=OUT_DROPOUT_RATE)

    if torch.cuda.is_available():    
        generator.cuda()
        discriminator.cuda()
        transformer.cuda()
    return transformer, generator, discriminator
# transformer, generator, discriminator = create_model(transformer, generator, discriminator)

In [20]:
# training
from tqdm import tqdm
from time import sleep
import copy
from sklearn.metrics import classification_report, accuracy_score, recall_score, precision_score

def get_fake_tensor(size):
    return torch.tensor([[0] * len(categories) + [1]] * size).to(device)

def train_model(transformers, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=25, last_epoch=0, last_acc=0):
  best_acc = last_acc
  max_epoch = last_epoch + num_epochs
  for epoch_i in range(last_epoch, max_epoch):
    print(f"Epoch {epoch_i + 1} / {max_epoch}")
    
    # Change dataset
    for phase in ["Train", "Val"]:
      instance = dataset[phase]
      if phase == "Train":
        # Training variable
        tr_g_loss = 0
        tr_d_loss = 0 
        tr_d_l_sup = 0
        tr_d_l_unsup = 0
        
        transformer.train() #maybe redundant
        discriminator.train()
        generator.train()
      else:
        # validation variables
        total_test_loss = 0

        pred_labels = []
        true_labels = []
        
        transformer.eval() #maybe redundant
        discriminator.eval()
        generator.eval()
        
      # Untuk setiap batch di training data
      pbar = tqdm(enumerate(instance), desc=phase, total=len(instance))
      for step, batch in pbar:
        
        batch_input_ids = batch[0].to(device)
        batch_input_att_mask = batch[1].to(device)
        batch_label = batch[2].to(device)
        batch_mask = batch[3].to(device)
        real_batch_size = batch_input_ids.shape[0]
        
        if phase == "Train":
            # Encode real data in the Transformer
            model_outputs = transformer(batch_input_ids, attention_mask=batch_input_att_mask)
            hidden_states = model_outputs[-1]
            # Generate fake data that should have the same distribution of the ones
            # encoded by the transformer. 
            # First noisy input are used in input to the Generator
            noise = torch.zeros(real_batch_size, NOISE_SIZE, device=device).uniform_(0, 1)
            # Gnerate Fake data
            gen_rep = generator(noise)

            # Generate the output of the Discriminator for real and fake data.
            # First, we put together the output of the tranformer and the generator
            disciminator_input = torch.cat([hidden_states, gen_rep], dim=0)
            # Then, we select the output of the disciminator
            features, probs = discriminator(disciminator_input)

            # Finally, we separate the discriminator's output for the real and fake
            # data
            features_list = torch.split(features, real_batch_size)
            D_real_features = features_list[0]
            D_fake_features = features_list[1]
            probs_list = torch.split(probs, real_batch_size)
            D_real_probs = probs_list[0]
            D_fake_probs = probs_list[1]

            #---------------------------------
            #  LOSS evaluation
            #---------------------------------
            # Generator's LOSS estimation
            g_loss_d = -1 * torch.mean(torch.log(1 - D_fake_probs[:,-1] + epsilon))
#             g_loss_d = -torch.mean(torch.sum((1 - get_fake_tensor(D_fake_probs.size()[0])), dim = -1))
            g_feat_reg = torch.mean(torch.pow(torch.mean(D_real_features, dim=0) - torch.mean(D_fake_features, dim=0), 2))
            g_loss = g_loss_d + g_feat_reg

            # Disciminator's LOSS estimation
            log_probs = D_real_probs[:,0:-1]
            # The discriminator provides an output for labeled and unlabeled real data
            # so the loss evaluated for unlabeled data is ignored (masked)
            per_example_loss = -torch.sum(batch_label * torch.log(log_probs) + (1 - batch_label) * torch.log(1 - log_probs), dim = -1)
            per_example_loss = torch.masked_select(per_example_loss, batch_mask.to(device))
            labeled_example_count = per_example_loss.type(torch.float32).numel()
            
            # It may be the case that a batch does not contain labeled examples, 
            # so the "supervised loss" in this case is not evaluated
            if labeled_example_count == 0:
              D_L_Supervised = torch.tensor([0]).to(device)
            else:
              D_L_Supervised = torch.div(torch.sum(per_example_loss.to(device)), labeled_example_count)
            
            D_L_unsupervised1U = -1 * torch.mean(torch.log(1 - D_real_probs[:, -1] + epsilon))
            D_L_unsupervised2U = -1 * torch.mean(torch.log(D_fake_probs[:, -1] + epsilon))
#             D_L_unsupervised1U = -torch.mean(torch.sum(get_fake_tensor(D_real_probs.size()[0]) * torch.log(D_real_probs) + (1 - get_fake_tensor(D_real_probs.size()[0])) * torch.log(1 - D_real_probs), dim = -1))
#             D_L_unsupervised2U = -torch.mean(torch.sum(get_fake_tensor(D_fake_probs.size()[0]) * torch.log(D_fake_probs)), dim = -1)
            D_L_unsupervised = D_L_unsupervised1U + D_L_unsupervised2U
            d_loss = D_L_Supervised + D_L_unsupervised
            
            #---------------------------------
            #  OPTIMIZATION
            #---------------------------------
            # Avoid gradient accumulation
            gen_optimizer.zero_grad()
            dis_optimizer.zero_grad()

            # Calculate weigth updates
            # retain_graph=True is required since the underlying graph will be deleted after backward
            g_loss.backward(retain_graph=True)
            d_loss.backward() 

            # Apply modifications
            gen_optimizer.step()
            dis_optimizer.step()

            # A detail log of the individual losses
            #print("{0:.4f}\t{1:.4f}\t{2:.4f}\t{3:.4f}\t{4:.4f}".
            #      format(D_L_Supervised, D_L_unsupervised1U, D_L_unsupervised2U,
            #             g_loss_d, g_feat_reg))

            # Save the losses to print them later
            tr_g_loss += g_loss.item()
            tr_d_loss += d_loss.item()
            tr_d_l_sup += D_L_Supervised.item()
            tr_d_l_unsup += D_L_unsupervised.item()

            # Update the learning rate with the scheduler
            if apply_scheduler:
              gen_scheduler.step()
              dis_scheduler.step()
                
            pbar.set_postfix(g_loss = str(f'{(tr_g_loss / len(instance)):.2f}'), d_loss = str(f'{(tr_d_loss / len(instance)):.2f}'), d_loss_sup = str(f'{(tr_d_l_sup / len(instance)):.2f}'), d_loss_unsup = str(f'{(tr_d_l_unsup / len(instance)):.2f}'))
            sleep(0.1)

        elif phase == "Val":
            # Tell pytorch not to bother with constructing the compute graph during
            # the forward pass, since this is only needed for backprop (training).
            with torch.no_grad():        
                model_outputs = transformer(batch_input_ids, attention_mask=batch_input_att_mask)
                hidden_states = model_outputs[-1]
                _, probs = discriminator(hidden_states)
                ###log_probs = F.log_softmax(probs[:,1:], dim=-1)
                filtered_logits = probs[:,0:-1]
                # Accumulate the test loss.
                loss = -torch.mean(torch.sum(batch_label * torch.log(filtered_logits) + (1 - batch_label) * torch.log(1 - filtered_logits), dim = 1))
                total_test_loss += loss

            pred_l = []
            for prob in filtered_logits:
                pred = [1 if x >= 0.5 else 0 for x in prob]
                if (np.sum(pred) == 0):
                    _, idx = torch.max(prob, dim = 0)
                    pred[idx] = 1
                pred_l.append(pred)
            pred_labels += pred_l
            true_l = batch_label
            true_labels += true_l.detach().cpu()
            
            # pbar.set_description(f"Average loss : {(loss_track / len(instance)):.2f}")
            pbar.set_postfix(loss = str(f'{(total_test_loss / len(instance)):.2f}'))
            sleep(0.1)
            
    # accuracy
    pred_labels = np.array(pred_labels)
    true_labels = torch.stack(true_labels).numpy()
    val_accuracy = accuracy_score(true_labels, pred_labels)
    val_precision = precision_score(true_labels, pred_labels, average='micro')
    val_recall = recall_score(true_labels, pred_labels, average='micro')
    print(f"Accuracy: {val_accuracy}, Precision (micro): {val_precision}, Recall (micro): {val_recall}")

    if (val_accuracy > best_acc):
      
      torch.save({
                  'best_transformer_sd' : transformer.state_dict(),
                  'best_generator_sd' : generator.state_dict(),
                  'best_discriminator_sd' : discriminator.state_dict(),
                  'best_gen_optimizer_sd' : gen_optimizer.state_dict(),
                  'best_dis_optimizer_sd' : dis_optimizer.state_dict(),
                  'best_gen_scheduler_sd' : gen_scheduler.state_dict(),
                  'best_dis_scheduler_sd' : dis_scheduler.state_dict(),
                  'best_epoch' : epoch_i,
                  'best_acc' : val_accuracy
                  }, PATH)
      best_acc = val_accuracy


  checkpoint = torch.load(PATH)
  transformer.load_state_dict(checkpoint['best_transformer_sd'])
  generator.load_state_dict(checkpoint['best_generator_sd'])
  discriminator.load_state_dict(checkpoint['best_discriminator_sd'])
  gen_optimizer.load_state_dict(checkpoint['best_gen_optimizer_sd'])
  dis_optimizer.load_state_dict(checkpoint['best_dis_optimizer_sd'])
  gen_scheduler.load_state_dict(checkpoint['best_gen_scheduler_sd'])
  dis_scheduler.load_state_dict(checkpoint['best_dis_scheduler_sd'])
  best_epoch = checkpoint['best_epoch']
  best_acc = checkpoint['best_acc']

  return {'transformer' : transformer, 'generator' : generator, 'discriminator' : discriminator, \
          'gen_optimizer' : gen_optimizer, 'dis_optimizer' : dis_optimizer,  \
          'gen_scheduler' : gen_scheduler, 'dis_scheduler' : dis_scheduler,  \
          'best_acc' : best_acc, "best_epoch" : best_epoch}  
#   return model

In [30]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

Epoch 1 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.62it/s, d_loss=5.51, d_loss_sup=4.58, d_loss_unsup=0.92, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.25it/s, loss=3.65]


Accuracy: 0.31309904153354634, Precision (micro): 0.38977635782747605, Recall (micro): 0.34173669467787116
Epoch 2 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=2.81, d_loss_sup=2.06, d_loss_unsup=0.75, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.31it/s, loss=1.83]


Accuracy: 0.6964856230031949, Precision (micro): 0.8146964856230032, Recall (micro): 0.7142857142857143
Epoch 3 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=1.96, d_loss_sup=1.22, d_loss_unsup=0.73, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.30it/s, loss=1.44]


Accuracy: 0.731629392971246, Precision (micro): 0.8395061728395061, Recall (micro): 0.7619047619047619
Epoch 4 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=1.52, d_loss_sup=0.79, d_loss_unsup=0.72, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.27it/s, loss=1.25]


Accuracy: 0.7348242811501597, Precision (micro): 0.8425925925925926, Recall (micro): 0.7647058823529411
Epoch 5 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=1.33, d_loss_sup=0.61, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s, loss=1.12]


Accuracy: 0.7539936102236422, Precision (micro): 0.8473053892215568, Recall (micro): 0.7927170868347339
Epoch 6 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=1.16, d_loss_sup=0.45, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.24it/s, loss=1.18]


Accuracy: 0.7539936102236422, Precision (micro): 0.8504398826979472, Recall (micro): 0.8123249299719888
Epoch 7 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.61it/s, d_loss=1.75, d_loss_sup=1.03, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s, loss=3.48]


Accuracy: 0.2747603833865815, Precision (micro): 0.3057324840764331, Recall (micro): 0.2689075630252101
Epoch 8 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.61it/s, d_loss=1.64, d_loss_sup=0.92, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.25it/s, loss=1.14]


Accuracy: 0.7635782747603834, Precision (micro): 0.8380681818181818, Recall (micro): 0.8263305322128851
Epoch 9 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.94, d_loss_sup=0.22, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.23it/s, loss=1.15]


Accuracy: 0.7539936102236422, Precision (micro): 0.8419540229885057, Recall (micro): 0.8207282913165266
Epoch 10 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.91, d_loss_sup=0.20, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.25it/s, loss=1.16]


Accuracy: 0.7699680511182109, Precision (micro): 0.8559077809798271, Recall (micro): 0.8319327731092437
Epoch 11 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.86, d_loss_sup=0.15, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.11it/s, loss=1.15]


Accuracy: 0.7603833865814696, Precision (micro): 0.8575581395348837, Recall (micro): 0.8263305322128851
Epoch 12 / 100


Train: 100%|██████████| 901/901 [04:11<00:00,  3.58it/s, d_loss=0.85, d_loss_sup=0.14, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.12it/s, loss=1.36]


Accuracy: 0.7476038338658147, Precision (micro): 0.8323699421965318, Recall (micro): 0.8067226890756303
Epoch 13 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.85, d_loss_sup=0.15, d_loss_unsup=0.71, g_loss=0.76]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.30it/s, loss=1.24]


Accuracy: 0.7699680511182109, Precision (micro): 0.8467966573816156, Recall (micro): 0.8515406162464986
Epoch 14 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.62it/s, d_loss=0.81, d_loss_sup=0.11, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s, loss=1.22]


Accuracy: 0.7859424920127795, Precision (micro): 0.8575498575498576, Recall (micro): 0.8431372549019608
Epoch 15 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.77, d_loss_sup=0.07, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.31it/s, loss=1.41]


Accuracy: 0.7603833865814696, Precision (micro): 0.8424068767908309, Recall (micro): 0.8235294117647058
Epoch 16 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.76, d_loss_sup=0.05, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.28it/s, loss=1.47]


Accuracy: 0.7507987220447284, Precision (micro): 0.8352272727272727, Recall (micro): 0.8235294117647058
Epoch 17 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.62it/s, d_loss=0.79, d_loss_sup=0.09, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.24it/s, loss=1.29]


Accuracy: 0.7667731629392971, Precision (micro): 0.8490028490028491, Recall (micro): 0.834733893557423
Epoch 18 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.77, d_loss_sup=0.06, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.30it/s, loss=1.35]


Accuracy: 0.7667731629392971, Precision (micro): 0.848314606741573, Recall (micro): 0.84593837535014
Epoch 19 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.61it/s, d_loss=0.76, d_loss_sup=0.05, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.23it/s, loss=1.51]


Accuracy: 0.7284345047923323, Precision (micro): 0.8181818181818182, Recall (micro): 0.8067226890756303
Epoch 20 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.73, d_loss_sup=0.03, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.18it/s, loss=1.35]


Accuracy: 0.7507987220447284, Precision (micro): 0.8389830508474576, Recall (micro): 0.8319327731092437
Epoch 21 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.73, d_loss_sup=0.03, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.23it/s, loss=1.41]


Accuracy: 0.7731629392971247, Precision (micro): 0.8547008547008547, Recall (micro): 0.8403361344537815
Epoch 22 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.77, d_loss_sup=0.07, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.24it/s, loss=1.41]


Accuracy: 0.7827476038338658, Precision (micro): 0.8554913294797688, Recall (micro): 0.8291316526610645
Epoch 23 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.76, d_loss_sup=0.06, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.23it/s, loss=1.41]


Accuracy: 0.7699680511182109, Precision (micro): 0.848314606741573, Recall (micro): 0.84593837535014
Epoch 24 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.59it/s, d_loss=0.75, d_loss_sup=0.05, d_loss_unsup=0.70, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.21it/s, loss=1.60]


Accuracy: 0.7667731629392971, Precision (micro): 0.8513119533527697, Recall (micro): 0.8179271708683473
Epoch 25 / 100


Train: 100%|██████████| 901/901 [04:09<00:00,  3.61it/s, d_loss=0.76, d_loss_sup=0.05, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.23it/s, loss=1.47]


Accuracy: 0.7891373801916933, Precision (micro): 0.8653295128939829, Recall (micro): 0.84593837535014
Epoch 26 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.75, d_loss_sup=0.05, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.27it/s, loss=1.57]


Accuracy: 0.7539936102236422, Precision (micro): 0.8296089385474861, Recall (micro): 0.8319327731092437
Epoch 27 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.73, d_loss_sup=0.03, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.27it/s, loss=1.60]


Accuracy: 0.7539936102236422, Precision (micro): 0.8352272727272727, Recall (micro): 0.8235294117647058
Epoch 28 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.72, d_loss_sup=0.02, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.28it/s, loss=1.65]


Accuracy: 0.7539936102236422, Precision (micro): 0.8405797101449275, Recall (micro): 0.8123249299719888
Epoch 29 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.72, d_loss_sup=0.02, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.28it/s, loss=1.67]


Accuracy: 0.744408945686901, Precision (micro): 0.829971181556196, Recall (micro): 0.8067226890756303
Epoch 30 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.78, d_loss_sup=0.08, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.28it/s, loss=1.63]


Accuracy: 0.7348242811501597, Precision (micro): 0.830028328611898, Recall (micro): 0.8207282913165266
Epoch 31 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.73, d_loss_sup=0.03, d_loss_unsup=0.70, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s, loss=1.68]


Accuracy: 0.7380191693290735, Precision (micro): 0.8267045454545454, Recall (micro): 0.8151260504201681
Epoch 32 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.59it/s, d_loss=0.71, d_loss_sup=0.01, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.18it/s, loss=1.74]


Accuracy: 0.7412140575079872, Precision (micro): 0.8257142857142857, Recall (micro): 0.8095238095238095
Epoch 33 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=0.72, d_loss_sup=0.02, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.26it/s, loss=1.80]


Accuracy: 0.7539936102236422, Precision (micro): 0.8362068965517241, Recall (micro): 0.8151260504201681
Epoch 34 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.72, d_loss_sup=0.02, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.29it/s, loss=1.75]


Accuracy: 0.7412140575079872, Precision (micro): 0.830945558739255, Recall (micro): 0.8123249299719888
Epoch 35 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.80, d_loss_sup=0.10, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.28it/s, loss=1.77]


Accuracy: 0.7380191693290735, Precision (micro): 0.8289855072463768, Recall (micro): 0.8011204481792717
Epoch 36 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.73, d_loss_sup=0.03, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.27it/s, loss=1.77]


Accuracy: 0.7476038338658147, Precision (micro): 0.8450292397660819, Recall (micro): 0.8095238095238095
Epoch 37 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.62it/s, d_loss=0.71, d_loss_sup=0.01, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.27it/s, loss=1.81]


Accuracy: 0.7412140575079872, Precision (micro): 0.8333333333333334, Recall (micro): 0.7983193277310925
Epoch 38 / 100


Train: 100%|██████████| 901/901 [04:08<00:00,  3.63it/s, d_loss=0.71, d_loss_sup=0.02, d_loss_unsup=0.70, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:05<00:00,  7.32it/s, loss=1.80]


Accuracy: 0.7412140575079872, Precision (micro): 0.8382352941176471, Recall (micro): 0.7983193277310925
Epoch 39 / 100


Train:  91%|█████████▏| 824/901 [03:46<00:21,  3.63it/s, d_loss=0.65, d_loss_sup=0.01, d_loss_unsup=0.64, g_loss=0.65]


KeyboardInterrupt: 

In [43]:
info = torch.load('Model/ganbert-dapt.pt')
# transformer.load_state_dict(info['best_transformer_sd'])
# generator.load_state_dict(info['best_generator_sd'])
# discriminator.load_state_dict(info['best_discriminator_sd'])
print(f"Acc : {info['best_acc']}, Epoch : {info['best_epoch']}")

Acc : 0.7891373801916933, Epoch : 24


In [21]:
from sklearn.metrics import classification_report, accuracy_score, jaccard_score, \
hamming_loss, cohen_kappa_score, f1_score, precision_score, recall_score
import math
def eval_all(y_human, y_machine, categories):
  print("================ Classification Report ================")
  print(classification_report(y_human, y_machine, target_names=categories))
  print("================ Multi Label Score ================")
  eval_multi_label(y_human, y_machine, categories)

def eval_multi_label(y_test, prediction, categories):
    # Butuh informasi lebih lengkap? silakan disimak di bawah ini
    print('Accuracy             :', accuracy_score(y_test, prediction))
    print('Precision            :', precision_score(y_test, prediction, average='micro'))
    print('Recall               :', recall_score(y_test, prediction, average='micro'))
    print('F1 Score             :', f1_score(y_test, prediction, average='micro'))
    print('Hamming Loss         :', hamming_loss(y_test, prediction))
    print('Jaccard Score')
    js = jaccard_score(y_test, prediction, average=None)
    for i, j in zip(categories, js):
      print(f"{i:<20} : {j}")
    print('Jaccard Score Macro Average:', jaccard_score(y_test, prediction, average='macro'))
    print('Jaccard Score Micro Average:', jaccard_score(y_test, prediction, average='micro'))

In [22]:
def evaluate_dataset(transformer, generator, discriminator, instance):
    transformer.eval() 
    discriminator.eval()
    generator.eval()
    pred_labels = []
    true_labels = []

    # Untuk setiap batch di validation data
    pbar = tqdm(enumerate(instance), desc="Evaluate", total=len(instance))
    for step, batch in pbar:

        # Unpack sebuah batch dari DataLoader
        batch_input_ids = batch[0].to(device)
        batch_input_att_mask = batch[1].to(device)
        batch_label = batch[2].to(device)
        batch_mask = batch[3].to(device)
        real_batch_size = batch_input_ids.shape[0]

        # jangan track gradient! ini sedang evaluasi, bukan training
        with torch.no_grad():        
            model_outputs = transformer(batch_input_ids, attention_mask=batch_input_att_mask)
            hidden_states = model_outputs[-1]
            _, probs = discriminator(hidden_states)
            ###log_probs = F.log_softmax(probs[:,1:], dim=-1)
            filtered_logits = probs[:,0:-1]
            # Accumulate the test loss.

        pred_l = []
        for prob in filtered_logits:
            pred = [1 if x >= 0.5 else 0 for x in prob]
            if (np.sum(pred) == 0):
                _, idx = torch.max(prob, dim = 0)
                pred[idx] = 1
            pred_l.append(pred)
        pred_labels += pred_l
        true_l = batch_label
        true_labels += true_l.detach().cpu()


    pred_labels = np.array(pred_labels)
    true_labels = torch.stack(true_labels).numpy()
    eval_all(true_labels, pred_labels, categories)
    print("")

In [44]:
evaluate_dataset(transformer, generator, discriminator, dataset['Val'])

Evaluate: 100%|██████████| 40/40 [00:01<00:00, 31.09it/s]

                   precision    recall  f1-score   support

             Anak       0.95      0.95      0.95        37
            Bedah       0.84      0.93      0.89        29
             Gigi       1.00      0.86      0.92         7
             Gizi       0.83      0.83      0.83        23
          Jantung       0.40      0.29      0.33         7
             Jiwa       0.82      0.82      0.82        22
        Kandungan       0.91      0.87      0.89        61
Kulit dan Kelamin       0.93      0.95      0.94        43
             Mata       0.93      0.93      0.93        15
             Paru       1.00      1.00      1.00         4
   Penyakit Dalam       0.71      0.71      0.71        49
            Saraf       0.75      0.50      0.60        12
              THT       0.97      0.94      0.95        31
           Tulang       0.82      1.00      0.90         9
          Urologi       0.80      0.50      0.62         8

        micro avg       0.87      0.85      0.86      




In [45]:
evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

Evaluate: 100%|██████████| 340/340 [00:10<00:00, 31.30it/s]


                   precision    recall  f1-score   support

             Anak       0.79      0.66      0.72       286
            Bedah       0.72      0.35      0.47       237
             Gigi       0.95      0.78      0.86       232
             Gizi       0.80      0.71      0.75       240
          Jantung       0.71      0.44      0.55        72
             Jiwa       0.76      0.63      0.69       144
        Kandungan       0.79      0.81      0.80       393
Kulit dan Kelamin       0.74      0.69      0.72       419
             Mata       0.97      0.91      0.94       215
             Paru       0.23      0.58      0.33        19
   Penyakit Dalam       0.49      0.64      0.55       426
            Saraf       0.38      0.25      0.30        95
              THT       0.70      0.72      0.71       235
           Tulang       0.82      0.53      0.65       132
          Urologi       0.59      0.19      0.29       126

        micro avg       0.72      0.65      0.68      

# ASO

In [24]:
def aso_pred(transformer, generator, discriminator, instance):
    transformer.eval() 
    discriminator.eval()
    generator.eval()
    pred_labels = []
    true_labels = []

    # Untuk setiap batch di validation data
    pbar = tqdm(enumerate(instance), desc="Evaluate", total=len(instance))
    for step, batch in pbar:

        # Unpack sebuah batch dari DataLoader
        batch_input_ids = batch[0].to(device)
        batch_input_att_mask = batch[1].to(device)
        batch_label = batch[2].to(device)
        batch_mask = batch[3].to(device)
        real_batch_size = batch_input_ids.shape[0]

        # jangan track gradient! ini sedang evaluasi, bukan training
        with torch.no_grad():        
            model_outputs = transformer(batch_input_ids, attention_mask=batch_input_att_mask)
            hidden_states = model_outputs[-1]
            _, probs = discriminator(hidden_states)
            ###log_probs = F.log_softmax(probs[:,1:], dim=-1)
            filtered_logits = probs[:,0:-1]
            # Accumulate the test loss.

        pred_l = []
        for prob in filtered_logits:
            pred = [1 if x >= 0.5 else 0 for x in prob]
            if (np.sum(pred) == 0):
                _, idx = torch.max(prob, dim = 0)
                pred[idx] = 1
            pred_l.append(pred)
        pred_labels += pred_l
        true_l = batch_label
        true_labels += true_l.detach().cpu()


    pred_labels = np.array(pred_labels)
    true_labels = torch.stack(true_labels).numpy()
    return pred_labels, true_labels

def aso_eval(prediction, y_test):
    return accuracy_score(y_test, prediction), f1_score(y_test, prediction, average='micro', zero_division=0), hamming_loss(y_test, prediction)

In [25]:
import csv  
import os

# Specifying path

path = 'Result/Deep_Learning_Result.csv'

# Checking whether the specified path exists

if not os.path.exists(path):
  print("gada")
  f = open(path, "w+")
  writer = csv.writer(f)
  writer.writerow(['Id', "Learning Method", 'Model', 'Accuracy', 'F1-Score', 'Hamming Loss'])
  f.close()
else:
  print('ada')

def record_result(row):
  f = open(path, "a")
  writer = csv.writer(f)
  writer.writerow(row)
  f.close()

ada


In [None]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

y_pred, y_test = aso_pred(transformer, generator, discriminator, dataset['Test'])
acc, f1, hamm = aso_eval(y_pred, y_test)

record_result([f"gan-bert-1", "semi-supervised", "gan-bert", acc, f1, hamm])

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

Downloading pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Epoch 1 / 100


Train: 100%|██████████| 901/901 [04:10<00:00,  3.60it/s, d_loss=5.84, d_loss_sup=4.87, d_loss_unsup=0.98, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:05<00:00,  6.90it/s, loss=3.71]


Accuracy: 0.25559105431309903, Precision (micro): 0.3035143769968051, Recall (micro): 0.2661064425770308
Epoch 2 / 100


Train: 100%|██████████| 901/901 [04:58<00:00,  3.02it/s, d_loss=2.97, d_loss_sup=2.22, d_loss_unsup=0.75, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:07<00:00,  5.59it/s, loss=1.89]


Accuracy: 0.7252396166134185, Precision (micro): 0.8466453674121406, Recall (micro): 0.742296918767507
Epoch 3 / 100


Train: 100%|██████████| 901/901 [04:57<00:00,  3.03it/s, d_loss=1.92, d_loss_sup=1.18, d_loss_unsup=0.73, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:06<00:00,  6.19it/s, loss=1.41]


Accuracy: 0.7348242811501597, Precision (micro): 0.8417721518987342, Recall (micro): 0.7450980392156863
Epoch 4 / 100


Train: 100%|██████████| 901/901 [04:56<00:00,  3.04it/s, d_loss=1.54, d_loss_sup=0.82, d_loss_unsup=0.73, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:07<00:00,  5.56it/s, loss=1.20]


Accuracy: 0.7476038338658147, Precision (micro): 0.8493975903614458, Recall (micro): 0.7899159663865546
Epoch 5 / 100


Train: 100%|██████████| 901/901 [04:57<00:00,  3.03it/s, d_loss=1.30, d_loss_sup=0.58, d_loss_unsup=0.72, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:06<00:00,  6.22it/s, loss=1.16]


Accuracy: 0.744408945686901, Precision (micro): 0.8449848024316109, Recall (micro): 0.7787114845938375
Epoch 6 / 100


Train: 100%|██████████| 901/901 [04:59<00:00,  3.01it/s, d_loss=1.20, d_loss_sup=0.48, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:06<00:00,  5.87it/s, loss=1.26]


Accuracy: 0.7348242811501597, Precision (micro): 0.829971181556196, Recall (micro): 0.8067226890756303
Epoch 7 / 100


Train: 100%|██████████| 901/901 [04:57<00:00,  3.02it/s, d_loss=1.11, d_loss_sup=0.40, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:07<00:00,  5.60it/s, loss=1.18]


Accuracy: 0.7603833865814696, Precision (micro): 0.855457227138643, Recall (micro): 0.8123249299719888
Epoch 8 / 100


Train:  10%|█         | 94/901 [00:31<04:40,  2.87it/s, d_loss=0.10, d_loss_sup=0.03, d_loss_unsup=0.08, g_loss=0.08]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Train:  99%|█████████▊| 888/901 [04:54<00:04,  2.90it/s, d_loss=0.99, d_loss_sup=0.28, d_loss_unsup=0.70, g_loss=0.74]

In [None]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator, weight_decay = REGULARIZATION)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator, weight_decay = REGULARIZATION) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

y_pred, y_test = aso_pred(transformer, generator, discriminator, dataset['Test'])
acc, f1, hamm = aso_eval(y_pred, y_test)

record_result([f"gan-bert-2", "semi-supervised", "gan-bert", acc, f1, hamm])

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

In [None]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator, weight_decay = REGULARIZATION)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator, weight_decay = REGULARIZATION) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

y_pred, y_test = aso_pred(transformer, generator, discriminator, dataset['Test'])
acc, f1, hamm = aso_eval(y_pred, y_test)

record_result([f"gan-bert-3", "semi-supervised", "gan-bert", acc, f1, hamm])

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

In [None]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator, weight_decay = REGULARIZATION)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator, weight_decay = REGULARIZATION) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

y_pred, y_test = aso_pred(transformer, generator, discriminator, dataset['Test'])
acc, f1, hamm = aso_eval(y_pred, y_test)

record_result([f"gan-bert-4", "semi-supervised", "gan-bert", acc, f1, hamm])

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

Epoch 1 / 100


Train: 100%|██████████| 901/901 [06:19<00:00,  2.38it/s, d_loss=5.83, d_loss_sup=4.86, d_loss_unsup=0.97, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.92it/s, loss=3.71]


Accuracy: 0.2523961661341853, Precision (micro): 0.29073482428115016, Recall (micro): 0.2549019607843137
Epoch 2 / 100


Train: 100%|██████████| 901/901 [05:39<00:00,  2.66it/s, d_loss=2.96, d_loss_sup=2.22, d_loss_unsup=0.75, g_loss=0.72]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.77it/s, loss=1.91]


Accuracy: 0.7124600638977636, Precision (micro): 0.8370607028753994, Recall (micro): 0.7338935574229691
Epoch 3 / 100


Train: 100%|██████████| 901/901 [05:40<00:00,  2.65it/s, d_loss=1.91, d_loss_sup=1.18, d_loss_unsup=0.73, g_loss=0.73]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.78it/s, loss=1.38]


Accuracy: 0.7380191693290735, Precision (micro): 0.8575949367088608, Recall (micro): 0.7591036414565826
Epoch 4 / 100


Train: 100%|██████████| 901/901 [05:40<00:00,  2.64it/s, d_loss=1.53, d_loss_sup=0.81, d_loss_unsup=0.73, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.83it/s, loss=1.29]


Accuracy: 0.7156549520766773, Precision (micro): 0.824773413897281, Recall (micro): 0.7647058823529411
Epoch 5 / 100


Train: 100%|██████████| 901/901 [05:39<00:00,  2.66it/s, d_loss=1.34, d_loss_sup=0.62, d_loss_unsup=0.72, g_loss=0.74]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.79it/s, loss=1.10]


Accuracy: 0.7603833865814696, Precision (micro): 0.8650306748466258, Recall (micro): 0.7899159663865546
Epoch 6 / 100


Train: 100%|██████████| 901/901 [05:39<00:00,  2.66it/s, d_loss=1.23, d_loss_sup=0.51, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.78it/s, loss=1.18]


Accuracy: 0.7667731629392971, Precision (micro): 0.8501440922190202, Recall (micro): 0.8263305322128851
Epoch 7 / 100


Train: 100%|██████████| 901/901 [05:38<00:00,  2.66it/s, d_loss=1.11, d_loss_sup=0.39, d_loss_unsup=0.72, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.81it/s, loss=1.07]


Accuracy: 0.7891373801916933, Precision (micro): 0.878698224852071, Recall (micro): 0.8319327731092437
Epoch 8 / 100


Train: 100%|██████████| 901/901 [05:38<00:00,  2.66it/s, d_loss=1.04, d_loss_sup=0.32, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:10<00:00,  3.78it/s, loss=1.06]


Accuracy: 0.7603833865814696, Precision (micro): 0.8448753462603878, Recall (micro): 0.8543417366946778
Epoch 9 / 100


Train: 100%|██████████| 901/901 [05:40<00:00,  2.65it/s, d_loss=0.98, d_loss_sup=0.27, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:09<00:00,  4.35it/s, loss=1.16]


Accuracy: 0.7795527156549521, Precision (micro): 0.861271676300578, Recall (micro): 0.834733893557423
Epoch 10 / 100


Train: 100%|██████████| 901/901 [05:41<00:00,  2.64it/s, d_loss=0.91, d_loss_sup=0.20, d_loss_unsup=0.71, g_loss=0.75]
Val: 100%|██████████| 40/40 [00:08<00:00,  4.66it/s, loss=1.26]


Accuracy: 0.7571884984025559, Precision (micro): 0.8526011560693642, Recall (micro): 0.8263305322128851
Epoch 11 / 100


Train:  73%|███████▎  | 662/901 [04:09<01:28,  2.70it/s, d_loss=0.64, d_loss_sup=0.11, d_loss_unsup=0.52, g_loss=0.55]

In [None]:
%%time
transformer, generator, discriminator = create_model(transformer, generator, discriminator)
transformer_vars = [i for i in transformer.parameters()]
d_vars = transformer_vars + [v for v in discriminator.parameters()]
g_vars = [v for v in generator.parameters()]

#optimizer
dis_optimizer = torch.optim.AdamW(d_vars, lr=learning_rate_discriminator, weight_decay = REGULARIZATION)
gen_optimizer = torch.optim.AdamW(g_vars, lr=learning_rate_generator, weight_decay = REGULARIZATION) 

num_train_instances = len(train_dataloader)
num_train_steps = int(num_train_instances / BATCH_SIZE * EPOCH)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

#scheduler
dis_scheduler = get_constant_schedule_with_warmup(dis_optimizer, num_warmup_steps = num_warmup_steps)
gen_scheduler = get_constant_schedule_with_warmup(gen_optimizer, num_warmup_steps = num_warmup_steps)

model = train_model(transformer, generator, discriminator, gen_optimizer, dis_optimizer, gen_scheduler, dis_scheduler, num_epochs=EPOCH, last_epoch=0, last_acc=0)

y_pred, y_test = aso_pred(transformer, generator, discriminator, dataset['Test'])
acc, f1, hamm = aso_eval(y_pred, y_test)

record_result([f"gan-bert-5", "semi-supervised", "gan-bert", acc, f1, hamm])

evaluate_dataset(transformer, generator, discriminator, dataset['Test'])

In [None]:
import os, signal

os.kill(os.getpid(), signal.SIGKILL)