In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
%%capture
!pip install transformers

In [None]:
!nvidia-smi

Tue Jun 27 08:57:46 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   55C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# as Data Scientist
import pandas as pd
import numpy as np

In [None]:
# torch neural network
import torch
import torch.nn as nn
import torch.nn.functional as F

# torch optimizers
from torch.optim import AdamW

# torch dataset
from torch.utils.data import Dataset, DataLoader

# transformers
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup, logging

# tqdm untils
from tqdm.auto import tqdm

# warnings
import warnings
warnings.filterwarnings("ignore")

logging.set_verbosity_error()

# Hyperparameters

### Paths

In [None]:
FULL_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/full.csv'
TRAIN_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/train.csv'
VAL_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/val.csv'
TEST_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/test.csv'

# all labels
ALL_LABELS_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/labels.csv'

# model path for saving model
MODEL_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/models/'

### Model settings

In [None]:
# Model settings
MAX_SEQUENCE_LENGTH = 200
BATCH_SIZE = 32
EPOCHS = 10

# Backbone
# MODEL_NAME = 'vinai/phobert-base'
# MODEL_NAME = 'bert-base-multilingual-cased'
MODEL_NAME = 'distilbert-base-cased'

### Device & torch

In [None]:
# CUDA checking
if torch.cuda.is_available():
    print('CUDA is available')
else:
    raise RuntimeError("CUDA is not available")

CUDA is available


In [None]:
# Change device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
# Seed everything
def seed_everything(seed_value):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(86)

# Load data

In [None]:
# Load data
def load_data(path):
    df = pd.read_csv(path)
    df['information'] = df.description + ' ' + df.requirements
    df.drop(columns=['description', 'requirements', 'industry'], inplace=True)
    df.columns = ['industries', 'information']

    return df

# Create onehot label
def create_onehot(y):
    seperated = y.split(' / ')
    re = np.zeros(NUM_LABELS)
    for i in range(NUM_LABELS):
        if ALL_LABELS[i] in seperated:
            re[i] = 1

    return re

In [None]:
# load data
train_df = load_data(TRAIN_PATH)
val_df = load_data(VAL_PATH)
test_df = load_data(TEST_PATH)

# job labels
ALL_LABELS = pd.read_csv(ALL_LABELS_PATH)['0'].tolist()
NUM_LABELS = len(ALL_LABELS)

# Dataset

In [None]:
class JobDescriptionDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=MAX_SEQUENCE_LENGTH):
        self.df = df
        self.max_len = max_len
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        """
        To customize dataset, inherit from Dataset class and implement
        __len__ & __getitem__
        __getitem__ should return
            data:
                input_ids
                attention_masks
                information
                targets
        """
        row = self.df.iloc[index]
        text, target = self.get_input_data(row)

        # Encode_plus will:
        # (1) split `information text` into token
        # (2) Add the '[CLS]' and '[SEP]' token to the start and end
        # (3) Truncate/Pad sentence to max length
        # (4) Map token to their IDS
        # (5) Create attention mask
        # (6) Return a dictionary of outputs
        encoding = self.tokenizer.encode_plus(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            add_special_tokens=True,
            return_attention_mask=True,
            return_token_type_ids=True,
            return_tensors='pt',
        )

        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_masks': encoding['attention_mask'].flatten(),
            # 'token_type_ids': encoding['token_type_ids'].flatten(),
            'target': torch.tensor(target, dtype=torch.long),
        }

    def get_input_data(self, row):
        text = row.information
        target = create_onehot(row.industries)

        return text, target

In [None]:
# checking
JobDescriptionDataset(train_df, AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False))[10]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

{'text': 'kế hoạch chiến lược tìm kiếm khách hàng tiềm năng. thiết kế tư vấn kế hoạch tài chính tối ưu khách hàng. hoàn thành chỉ tiêu kinh doanh yêu cầu vị trí ứng tuyển. chú trọng xây dựng thương hiệu công ty đem trải nghiệm dịch vụ khách hàng . 23 trở tốt nghiệp cao đẳng đại học sinh viên chờ ưu tiên kinh nghiệm tài chính bảo hiểm bất động sản ô tô chủ động trách nhiệm công việc tìm kiếm khai thác xây dựng khách hàng tiềm năng áp lực công việc doanh số',
 'input_ids': tensor([  101,   180, 28647, 16358, 28640,  1732, 22572,  1182, 28647,  1179,
           181, 28265, 28660,  1665,   189, 21409,  1306,   180,  1182, 28647,
          1306,   180,  1324,  5589,  1732,   177,  9183,  2118,   189,  1182,
         28648,  1306,   183, 15287,  2118,   119, 24438,  1182, 28647,  1204,
           180, 28647,   189, 28265,   191, 28642,  1179,   180, 28647, 16358,
         28640,  1732,   189,  9183,  1182, 22572,  7326,  1324,   189, 28654,
          1182,   343,  1358,   180,  1324,  5589, 

# Data Module

In [None]:
def prepare_data(df, tokenizer, batch_size=BATCH_SIZE, shuffle=True, num_workers=4):
    dataset = JobDescriptionDataset(df, tokenizer)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    return data_loader

In [None]:
## test
print('Training batch size is: ')
len(prepare_data(train_df, tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)))

Training batch size is: 


750

# Classifier

* NeuralNets
* TextCNN
* Bi-LSTM
* Bi-GRU

In [None]:
# Simple Neural Net
class MultilabelClassifier_NeuralNet(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_NeuralNet, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.hidden = nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size)
        self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
        self.dropout = nn.Dropout(0.3)

        # # Freeze BERT parameters
        # for param in self.bert.parameters():
        #     param.requires_grad = False

        # Initialization
        nn.init.xavier_uniform_(self.hidden.weight)
        nn.init.xavier_uniform_(self.classifier.weight)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)

        # # Bert (use for feature extraction)
        # with torch.no_grad():
        #     out = self.bert(input_ids=input_ids, attention_mask=attention_masks)

        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Neural network
        x = self.dropout(pooled_out)
        x = self.hidden(x)
        x = F.relu(x)
        x = self.dropout(x)
        logits = self.classifier(x)

        return logits




# TextCNN
class MultilabelClassifier_TextCNN(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_TextCNN, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.cnn = nn.Conv1d(self.bert.config.hidden_size, 256, kernel_size=3, padding=1)
        self.classifier = nn.Linear(256, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.cnn.weight)
        nn.init.constant_(self.cnn.bias, 0)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        word_embeddings = out.last_hidden_state.permute(0, 2, 1) # Reshape for CNN input

        # CNN
        cnn_out = self.cnn(word_embeddings)
        cnn_out = F.relu(cnn_out)
        pooled_out = F.max_pool1d(cnn_out, kernel_size=cnn_out.size(2)).squeeze(2)

        # Dropout
        x = self.dropout(pooled_out)

        # Classifier
        logits = self.classifier(x)

        return logits




# Bi-LSTM
class MultilabelClassifier_BiLSTM(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_BiLSTM, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.b_lstm = nn.LSTM(self.bert.config.hidden_size, self.bert.config.hidden_size, batch_first=True, bidirectional=True)
        self.classifier = nn.Linear(2 * self.b_lstm.hidden_size, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.b_lstm.weight_ih_l0)
        nn.init.xavier_uniform_(self.b_lstm.weight_hh_l0)
        nn.init.constant_(self.b_lstm.bias_ih_l0, 0)
        nn.init.constant_(self.b_lstm.bias_hh_l0, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Bi-LSTM
        lstm_out, _ = self.b_lstm(pooled_out.unsqueeze(0))
        lstm_out = lstm_out.squeeze(0)

        # Dropout
        x = self.dropout(lstm_out)

        # Classifier
        logits = self.classifier(x)

        return logits




# Bi-GRU
class MultilabelClassifier_BiGRU(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_BiGRU, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.b_gru = nn.GRU(self.bert.config.hidden_size, self.bert.config.hidden_size, batch_first=True, bidirectional=True)
        self.classifier = nn.Linear(2 * self.b_gru.hidden_size, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.b_gru.weight_ih_l0)
        nn.init.xavier_uniform_(self.b_gru.weight_hh_l0)
        nn.init.constant_(self.b_gru.bias_ih_l0, 0)
        nn.init.constant_(self.b_gru.bias_hh_l0, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Bi-GRU
        gru_out, _ = self.b_gru(pooled_out.unsqueeze(0))
        gru_out = gru_out.squeeze(0)

        # Dropout
        x = self.dropout(gru_out)

        # Classifier
        logits = self.classifier(x)

        return logits

In [None]:
# Dataset & model
t_dset = JobDescriptionDataset(train_df, AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False))
t_model = MultilabelClassifier_BiGRU()

# Extract data
idx = 1
input_ids = t_dset[idx]['input_ids'].unsqueeze(0)
attention_masks = t_dset[idx]['attention_masks'].unsqueeze(0)
target = t_dset[idx]['target'].unsqueeze(0)

# Predicting
t_model.cpu()
with torch.no_grad():
    logits = t_model(input_ids, attention_masks)

print(logits)
print(torch.sigmoid(logits))
print(torch.round(torch.sigmoid(logits)))

Downloading pytorch_model.bin:   0%|          | 0.00/263M [00:00<?, ?B/s]

tensor([[-0.0872, -0.0045,  0.0785, -0.0296, -0.0666,  0.0296,  0.0185,  0.1200,
         -0.0343, -0.0690,  0.0298, -0.0161,  0.1091, -0.0033,  0.1447, -0.0447,
          0.0140,  0.0202,  0.1748, -0.0714,  0.0455, -0.1038, -0.0780, -0.0355,
         -0.1149,  0.0007, -0.1589,  0.0705,  0.0447, -0.0527, -0.1129, -0.1584,
         -0.0829,  0.0088,  0.0699,  0.0149,  0.0937, -0.1312,  0.0628,  0.0545,
         -0.0960,  0.0065, -0.1879, -0.1141, -0.0530,  0.0969,  0.0635,  0.0438,
          0.1292,  0.1590, -0.1624, -0.0973,  0.0413, -0.0706,  0.0443, -0.0679,
          0.0681, -0.0196,  0.0040,  0.0211,  0.1187,  0.0413,  0.0328,  0.0787,
         -0.0418,  0.2071, -0.0177,  0.0408,  0.0864,  0.0697,  0.2211,  0.0253]])
tensor([[0.4782, 0.4989, 0.5196, 0.4926, 0.4834, 0.5074, 0.5046, 0.5300, 0.4914,
         0.4828, 0.5074, 0.4960, 0.5273, 0.4992, 0.5361, 0.4888, 0.5035, 0.5051,
         0.5436, 0.4822, 0.5114, 0.4741, 0.4805, 0.4911, 0.4713, 0.5002, 0.4604,
         0.5176, 0.5112, 0

# Training

In [None]:
def train(model, criterion, optimizer, lr_scheduler, train_loader):
    model.train()
    losses = []

    for data in tqdm(train_loader):

        # Extract
        input_ids = data['input_ids'].to(device)
        attention_masks = data['attention_masks'].to(device)
        # token_type_ids = data['token_type_ids'].to(device)
        target = data['target'].to(device)

        # Forward
        optimizer.zero_grad()
        logits = model(
            input_ids=input_ids,
            attention_masks=attention_masks
        )

        # Cal loss
        loss = criterion(logits, target.float())

        # Backward
        losses.append(loss.item())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        lr_scheduler.step()

    print(f'Loss: {np.mean(losses)}')


def training_step(model, backbone, name, epochs=EPOCHS):
    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)

    # Dataloader
    train_data_loader = prepare_data(train_df, tokenizer)
    val_data_loader = prepare_data(val_df, tokenizer)
    test_data_loader = prepare_data(test_df, tokenizer)

    # Model settings
    criterion = nn.BCEWithLogitsLoss()
    optimizer = AdamW(model.parameters(), lr=2e-5)
    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=len(train_data_loader) * EPOCHS
    )

    for epoch in range(EPOCHS):
        print(f'Epoch {epoch + 1}/{EPOCHS}')
        print('-'*68)
        train(model, criterion, optimizer, lr_scheduler, train_data_loader)

        # Save
        torch.save(model.state_dict(), MODEL_PATH + backbone + '_' + name + '_' + str(epoch) + '.pth')

### Training Neural Net

In [None]:
neural_net = MultilabelClassifier_NeuralNet().to(device)
training_step(neural_net, MODEL_NAME, 'NeuralNet')

Epoch 1/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.11847427995502949
Epoch 2/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.08034089673062166
Epoch 3/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.07160642152031263
Epoch 4/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06637921725710233
Epoch 5/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06289696787794431
Epoch 6/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.059873861650625866
Epoch 7/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.057752832666039464
Epoch 8/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05551784274727106
Epoch 9/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7ad8c35c60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7ad8c35c60>
Traceback (most recent call last):
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Loss: 0.054048028523723285
Epoch 10/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05283773205429316


### Training TextCNN

In [None]:
text_cnn = MultilabelClassifier_TextCNN().to(device)
training_step(text_cnn, MODEL_NAME, 'TextCNN')

Epoch 1/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.1497246306737264
Epoch 2/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7ad8c35c60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Loss: 0.09357247494161129
Epoch 3/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.08014470500250657
Epoch 4/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.07322911172608534
Epoch 5/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.0683108515093724
Epoch 6/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06470752175649007
Epoch 7/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.0618660615781943
Epoch 8/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05962676022946835
Epoch 9/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.057827520417670406
Epoch 10/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.0565349794725577


### Training Bi-LSTM

In [None]:
bi_lstm = MultilabelClassifier_BiLSTM().to(device)
training_step(bi_lstm, MODEL_NAME, 'BiLSTM')

Epoch 1/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.12502328858772913
Epoch 2/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.09410704277952513
Epoch 3/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.08395367756982644
Epoch 4/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.07566421116391818
Epoch 5/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.07023862776656946
Epoch 6/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06652234336733819
Epoch 7/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7ad8c35c60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7ad8c35c60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Loss: 0.06371688612302144
Epoch 8/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.061653831844528516
Epoch 9/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06009509376188119
Epoch 10/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05915787464380264


### Training Bi-GRU

In [None]:
bi_gru = MultilabelClassifier_BiGRU().to(device)
training_step(bi_gru, MODEL_NAME, 'BiGRU')

Epoch 1/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.11809902694821357
Epoch 2/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.08733782576024532
Epoch 3/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.07442893154422442
Epoch 4/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06792382274071375
Epoch 5/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06384391553203266
Epoch 6/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.06076712237795194
Epoch 7/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05823745905856292
Epoch 8/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05614323247472445
Epoch 9/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.054736403996745744
Epoch 10/10
--------------------------------------------------------------------


  0%|          | 0/750 [00:00<?, ?it/s]

Loss: 0.05366044068833192
