In [1]:
import torch
from dataLoader import small_train_loader
from model import VoicePassingModel
from transformers import DistilBertTokenizer
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

In [15]:
tokenizer

DistilBertTokenizer(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True)

In [3]:
iterer = iter(small_train_loader)


In [4]:
X,y = next(iterer)

In [5]:
X_ = tokenizer(
    text = X,
    add_special_tokens = True,
    max_length = 512,
    padding = "max_length",
    truncation = True,
    return_tensors = "pt"
)

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [37]:
model = VoicePassingModel().to(device)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [38]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.003)

In [None]:
for num_epoch in tqdm(range(10)):

    total_loss = 0

    for i, (X, y) in enumerate(small_train_loader):

        X_ = tokenizer(
            text = X,
            add_special_tokens = True,
            max_length = 512,
            padding = "max_length",
            truncation = True,
            return_tensors = "pt"
            ).to(device)
        
        pred = model(X_)

        loss = criterion(pred, y.squeeze().to(device))
        total_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i == 1:
            break

    print(total_loss)

In [16]:
a = torch.tensor([3])

In [17]:
a

tensor([3])

In [18]:
a.item

<function Tensor.item>

In [19]:
a.item()

3

In [74]:
import torch
from dataLoader import small_train_loader
from model import VoicePassingModel
from transformers import DistilBertTokenizer
from tqdm import tqdm

trainer_config = {
    'model' : VoicePassingModel(),
}

class VoicePassingTrainer():

    def __init__(self, model):

        self.device = "cuda" if torch.cuda.is_available() else 'cpu'
        self.model = model.to(self.device)
        self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

    def set_model(self, model):
        self.model = model.to(self.device)

    def train(self, num_epochs, train_loader, criterion, lr = 0.003, hist = False):

        self.criterion = criterion
        self.optimizer = torch.optim.Adam(params = self.model.parameters(), lr = lr)
        self.model.train()

        for num_epoch in tqdm(range(num_epochs), desc="EPOCH", leave = True):
            result = self.train_one_epoch(num_epoch, train_loader, hist = hist)
            

    def train_one_epoch(self, index, train_loader, verbose = True, hist = False):

        train_loss = 0

        for X, y in tqdm(train_loader, desc="batch", leave=False):

            X = self.tokenizer(
                text = X,
                add_special_tokens = True,
                max_length = 512,
                padding = "max_length",
                truncation = True,
                return_tensors = "pt"
            ).to(self.device)

            y = y.squeeze().to(self.device)

            pred = self.model(X)
            loss = self.criterion(pred, y)
            train_loss += loss.item()

            loss.backward()
            self.optimizer.step()

        if verbose:
            print(f"EPOCH {index+1} Loss : {train_loss : .4f}")
            print(pred)
        
        if hist:
            return train_loss

    def test_a_sentence(self, text):

        X = self.tokenizer(
                text = text,
                add_special_tokens = True,
                max_length = 512,
                padding = "max_length",
                truncation = True,
                return_tensors = "pt"
            ).to(self.device)
        
        pred = self.model(X)
        return pred

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [75]:
trainer = VoicePassingTrainer(VoicePassingModel())

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [76]:
criterion = torch.nn.CrossEntropyLoss()

In [99]:
trainer.train(30, small_train_loader, criterion, lr = 0.0005)

EPOCH:   3%|▎         | 1/30 [00:02<01:04,  2.24s/it]

EPOCH 1 Loss :  26.3771
tensor([[ -0.2854,   0.6702,   0.7785,  -0.9317],
        [ 10.8365,   0.8576,  -3.2630, -15.8015],
        [ -0.2854,   0.6702,   0.7785,  -0.9317],
        [ -0.2854,   0.6702,   0.7785,  -0.9317]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:   7%|▋         | 2/30 [00:04<00:56,  2.03s/it]

EPOCH 2 Loss :  27.4193
tensor([[ -0.2730,   0.6584,   0.7910,  -0.9442],
        [  8.0653,   0.6971,  -2.3724, -11.6767],
        [ -0.2730,   0.6584,   0.7910,  -0.9442],
        [ -0.2730,   0.6584,   0.7910,  -0.9442]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  10%|█         | 3/30 [00:05<00:52,  1.95s/it]

EPOCH 3 Loss :  26.1112
tensor([[-0.2598,  0.6458,  0.8033, -0.9568],
        [-0.2598,  0.6458,  0.8033, -0.9568],
        [-0.2598,  0.6458,  0.8033, -0.9568],
        [-0.2598,  0.6458,  0.8033, -0.9568]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  13%|█▎        | 4/30 [00:07<00:49,  1.92s/it]

EPOCH 4 Loss :  26.1357
tensor([[-0.2459,  0.6327,  0.8155, -0.9693],
        [-0.2459,  0.6327,  0.8155, -0.9693],
        [-0.2459,  0.6327,  0.8155, -0.9693],
        [-0.2459,  0.6327,  0.8155, -0.9693]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  17%|█▋        | 5/30 [00:09<00:47,  1.90s/it]

EPOCH 5 Loss :  30.3556
tensor([[-0.2310,  0.6190,  0.8275, -0.9819],
        [ 6.2667,  0.7614, -1.9548, -8.4692],
        [-0.2310,  0.6190,  0.8275, -0.9819],
        [-0.2310,  0.6190,  0.8275, -0.9819]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  20%|██        | 6/30 [00:11<00:45,  1.90s/it]

EPOCH 6 Loss :  29.0475
tensor([[-0.2146,  0.6041,  0.8390, -0.9945],
        [ 4.0603,  0.7505, -1.0529, -5.7448],
        [-0.2146,  0.6041,  0.8390, -0.9945],
        [-0.2146,  0.6041,  0.8390, -0.9945]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  23%|██▎       | 7/30 [00:13<00:43,  1.89s/it]

EPOCH 7 Loss :  27.1404
tensor([[-0.1973,  0.5884,  0.8501, -1.0072],
        [ 6.3766,  0.8757, -2.1455, -8.0461],
        [-0.1973,  0.5884,  0.8501, -1.0072],
        [-0.1973,  0.5884,  0.8501, -1.0072]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  27%|██▋       | 8/30 [00:15<00:41,  1.89s/it]

EPOCH 8 Loss :  29.0039
tensor([[-0.1792,  0.5725,  0.8606, -1.0199],
        [-0.1792,  0.5725,  0.8606, -1.0199],
        [-0.1792,  0.5725,  0.8606, -1.0199],
        [-0.1792,  0.5725,  0.8606, -1.0199]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  30%|███       | 9/30 [00:17<00:39,  1.90s/it]

EPOCH 9 Loss :  26.9344
tensor([[-0.1606,  0.5565,  0.8706, -1.0326],
        [ 8.3001,  0.9626, -3.1834, -9.3918],
        [-0.1606,  0.5565,  0.8706, -1.0326],
        [-0.1606,  0.5565,  0.8706, -1.0326]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  33%|███▎      | 10/30 [00:19<00:37,  1.90s/it]

EPOCH 10 Loss :  26.4360
tensor([[-0.1420,  0.5412,  0.8801, -1.0453],
        [-0.1420,  0.5412,  0.8801, -1.0453],
        [-0.1420,  0.5412,  0.8801, -1.0453],
        [-0.1420,  0.5412,  0.8801, -1.0453]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  37%|███▋      | 11/30 [00:21<00:35,  1.89s/it]

EPOCH 11 Loss :  27.1341
tensor([[-0.1236,  0.5276,  0.8891, -1.0580],
        [ 9.5194,  0.8938, -3.9302, -9.8113],
        [-0.1236,  0.5276,  0.8891, -1.0580],
        [-0.1236,  0.5276,  0.8891, -1.0580]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  40%|████      | 12/30 [00:22<00:33,  1.88s/it]

EPOCH 12 Loss :  25.5496
tensor([[-0.1062,  0.5170,  0.8979, -1.0707],
        [-0.1062,  0.5170,  0.8979, -1.0707],
        [-0.1062,  0.5170,  0.8979, -1.0707],
        [-0.1062,  0.5170,  0.8979, -1.0707]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  43%|████▎     | 13/30 [00:24<00:32,  1.88s/it]

EPOCH 13 Loss :  25.8045
tensor([[-0.0894,  0.5098,  0.9063, -1.0834],
        [-0.0894,  0.5098,  0.9063, -1.0834],
        [-0.0894,  0.5098,  0.9063, -1.0834],
        [-0.0894,  0.5098,  0.9063, -1.0834]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


EPOCH:  47%|████▋     | 14/30 [00:26<00:30,  1.89s/it]

EPOCH 14 Loss :  27.0306
tensor([[ -0.0734,   0.5069,   0.9145,  -1.0960],
        [ 15.3998,   2.0470,  -7.3409, -13.7321],
        [ -0.0734,   0.5069,   0.9145,  -1.0960],
        [ -0.0734,   0.5069,   0.9145,  -1.0960]], device='cuda:0',
       grad_fn=<AddmmBackward0>)




In [98]:
trainer.test_a_sentence("예, 그 증거로 제출하면 채택 될 때 사용됩니다.")

tensor([[-0.2973,  0.6820,  0.7665, -0.9197]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [84]:
next(iter(small_train_loader))

[('김승재라고 아시는 분이십니까',
  '혹시 타인에게 통장  판매하신 적 있으신가요?',
  '예, 그 증거로 제출하면 채택 될 때 사용됩니다.',
  '그러시면 칠십하루 년생 사내 강자 상자 호차 강상호란 사람은 아십니까'),
 tensor([[1],
         [1],
         [2],
         [1]])]