In [1]:
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.optim import Adam
from tqdm.notebook import tqdm
import torch.nn.functional as F
import GPUtil
import random

In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)

In [3]:
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=8)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [4]:
dataframe = pd.read_csv('./resource/emotion_classification.csv').dropna(axis=0).sample(frac=0.5)
#중립0 행복1 슬픔2 공포3 분노4 놀람5 혐오6 상처7
emodict = {'중립':0, '행복':1, '슬픔':2, '공포':3, '분노':4, '놀람':5, '혐오':6, '상처':7}
for k,v in emodict.items():
    dataframe = dataframe.replace(k, v)
dataframe[:10]

Unnamed: 0,사람문장1,감정_대분류
116165,자연의 힘으로 간단하게 통일,0
67503,집에 들어오면 제일 먼저 반겨준 해피가 집에 없으니까 너무 허전하고 이상해. 해피가...,2
149164,별 이상 없어 보이고요. 접종은 차근차근 하시면 돼요.,0
73437,당연히 마음에 들지! 엄청 비싼 향수야.,1
164187,아~아~그러니까 내 차를 거기다가 그러니까 AS를 맡겼는데…,0
137577,이길 수 있을지 나도 몰라. 하지만 최선을 다할거야. 그러고 싶어졌어.,0
173262,"아, 남편이었음 해서요. 이 세상에서 나보다 더 어울릴 사람은 없어요. 정말 남편이...",0
147481,제가 갑자기 해외 지사로 발령을 받게 돼서 상품을 계속 이용할 수 없게 됐는데 이런...,0
13426,노인정 사람들이 내가 요실금인걸 알까봐 긴장돼.,3
180940,그럴 리가 없잖냐.,0


In [5]:
train_df, test_df = train_test_split(dataframe, test_size=0.2, random_state=42)

In [6]:
# max_len = 64
# batch_size = 64
# warmup_ratio = 0.1
# num_epochs = 5
# max_grad_norm = 1
# log_interval = 200
# learning_rate =  5e-5

In [7]:
class MyDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        text = self.df.iloc[idx, 0]
        label = self.df.iloc[idx, 1]
        return text, label

In [8]:
train_dataset = MyDataset(train_df)
test_dataset = MyDataset(test_df)

In [9]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=0)

In [10]:
device = torch.device("cuda:0")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [11]:
optimizer = Adam(model.parameters(), lr=1e-5)
itr = 1
p_itr = 1000
epochs = 5
total_loss = 0
total_len = 0
total_correct = 0
MAX_LEN=128

In [12]:
#print('Tokenized: ', tokenizer.tokenize(data['발화문'][0]))
#  print(train_dataset.__getitem__(2))
# #for text, label in tqdm(enumerate(train_loader), total=len(train_loader)):
# for text,label in train_loader:
#      print(text)
#      print(label)

In [None]:
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

for epoch in range(epochs):
    GPUtil.showUtilization()
    model.train()
    for batch_id, (text, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
        optimizer.zero_grad()
        model.zero_grad()
        # encoding and zero padding
        encoded_list = [tokenizer.encode(t, add_special_tokens=True, max_length=MAX_LEN, pad_to_max_length=True) for t in text]
        #padded_list =  [e + [0] * (512-len(e)) for e in encoded_list]
        sample = torch.tensor(encoded_list)
        labels = torch.tensor(label)
        sample, label = sample.to(device), label.to(device)
        labels = torch.tensor(label)
        outputs = model(sample, labels=labels)
        #loss, logits = outputs
        loss = outputs[0]
        logits = outputs[1]


        pred = torch.argmax(F.softmax(logits, dim=1), dim=1)
        correct = pred.eq(labels)
        total_correct += correct.sum().item()
        total_len += len(labels)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        if itr % p_itr == 0:
            print('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.3f}'.format(epoch+1, epochs, itr, total_loss/p_itr, total_correct/total_len))
            total_loss = 0
            total_len = 0
            total_correct = 0
    #torch.save(model.state_dict(), './model.pt')
        itr+=1 
     
    model.eval()
    total_len = 0
    total_correct = 0
    for batch_id, (text, label) in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
        encoded_list = [tokenizer.encode(t, add_special_tokens=True, max_length=MAX_LEN, pad_to_max_length=True) for t in text]
        sample = torch.tensor(encoded_list)
        labels = torch.tensor(label)
        sample, label = sample.to(device), label.to(device)
        labels = torch.tensor(label)
        with torch.no_grad():    
            outputs = model(sample, labels=labels)
        
        logits = outputs[1]
        pred = torch.argmax(F.softmax(logits, dim=1), dim=1)
        correct = pred.eq(labels)
        total_correct += correct.sum().item()
        total_len += len(labels)
    print("epoch {} test acc {}".format(epoch+1, total_correct/total_len))
    total_len = 0
    total_correct = 0
    
        
        

| ID | GPU | MEM |
------------------
|  0 | 36% | 14% |


  0%|          | 0/18420 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  labels = torch.tensor(label)
  labels = torch.tensor(label)


[Epoch 1/5] Iteration 1000 -> Train Loss: 1.8372, Accuracy: 0.333
[Epoch 1/5] Iteration 2000 -> Train Loss: 1.6386, Accuracy: 0.406
[Epoch 1/5] Iteration 3000 -> Train Loss: 1.4817, Accuracy: 0.467
[Epoch 1/5] Iteration 4000 -> Train Loss: 1.4074, Accuracy: 0.506
[Epoch 1/5] Iteration 5000 -> Train Loss: 1.3558, Accuracy: 0.525
[Epoch 1/5] Iteration 6000 -> Train Loss: 1.3439, Accuracy: 0.526
[Epoch 1/5] Iteration 7000 -> Train Loss: 1.2942, Accuracy: 0.545
[Epoch 1/5] Iteration 8000 -> Train Loss: 1.2869, Accuracy: 0.539
[Epoch 1/5] Iteration 9000 -> Train Loss: 1.2686, Accuracy: 0.555
[Epoch 1/5] Iteration 10000 -> Train Loss: 1.2623, Accuracy: 0.548
[Epoch 1/5] Iteration 11000 -> Train Loss: 1.2573, Accuracy: 0.560
[Epoch 1/5] Iteration 12000 -> Train Loss: 1.2097, Accuracy: 0.577
[Epoch 1/5] Iteration 13000 -> Train Loss: 1.1942, Accuracy: 0.576
[Epoch 1/5] Iteration 14000 -> Train Loss: 1.2281, Accuracy: 0.564
[Epoch 1/5] Iteration 15000 -> Train Loss: 1.2142, Accuracy: 0.573
[Epo

  0%|          | 0/4606 [00:00<?, ?it/s]

  labels = torch.tensor(label)
  labels = torch.tensor(label)


epoch 1 test acc 0.5877531078660225
| ID | GPU | MEM |
------------------
|  0 | 61% | 50% |


  0%|          | 0/18420 [00:00<?, ?it/s]

[Epoch 2/5] Iteration 19000 -> Train Loss: 1.1500, Accuracy: 0.611
[Epoch 2/5] Iteration 20000 -> Train Loss: 1.1022, Accuracy: 0.609
[Epoch 2/5] Iteration 21000 -> Train Loss: 1.1009, Accuracy: 0.608
[Epoch 2/5] Iteration 22000 -> Train Loss: 1.0965, Accuracy: 0.610
[Epoch 2/5] Iteration 23000 -> Train Loss: 1.0819, Accuracy: 0.613
[Epoch 2/5] Iteration 24000 -> Train Loss: 1.1036, Accuracy: 0.609
[Epoch 2/5] Iteration 25000 -> Train Loss: 1.1127, Accuracy: 0.605
[Epoch 2/5] Iteration 26000 -> Train Loss: 1.0947, Accuracy: 0.617
[Epoch 2/5] Iteration 27000 -> Train Loss: 1.0702, Accuracy: 0.630
[Epoch 2/5] Iteration 28000 -> Train Loss: 1.0757, Accuracy: 0.620
[Epoch 2/5] Iteration 29000 -> Train Loss: 1.0908, Accuracy: 0.605
[Epoch 2/5] Iteration 30000 -> Train Loss: 1.0875, Accuracy: 0.613
[Epoch 2/5] Iteration 31000 -> Train Loss: 1.0664, Accuracy: 0.619
[Epoch 2/5] Iteration 32000 -> Train Loss: 1.0714, Accuracy: 0.625
[Epoch 2/5] Iteration 33000 -> Train Loss: 1.1018, Accuracy: 0

  0%|          | 0/4606 [00:00<?, ?it/s]

epoch 2 test acc 0.6075131643233267
| ID | GPU | MEM |
------------------
|  0 | 64% | 50% |


  0%|          | 0/18420 [00:00<?, ?it/s]

[Epoch 3/5] Iteration 37000 -> Train Loss: 1.0483, Accuracy: 0.652
[Epoch 3/5] Iteration 38000 -> Train Loss: 0.9433, Accuracy: 0.662
[Epoch 3/5] Iteration 39000 -> Train Loss: 0.9695, Accuracy: 0.646
[Epoch 3/5] Iteration 40000 -> Train Loss: 0.9764, Accuracy: 0.652
[Epoch 3/5] Iteration 41000 -> Train Loss: 1.0248, Accuracy: 0.638
[Epoch 3/5] Iteration 42000 -> Train Loss: 0.9429, Accuracy: 0.656
[Epoch 3/5] Iteration 43000 -> Train Loss: 0.9670, Accuracy: 0.651
[Epoch 3/5] Iteration 44000 -> Train Loss: 1.0083, Accuracy: 0.639
[Epoch 3/5] Iteration 45000 -> Train Loss: 0.9729, Accuracy: 0.659
[Epoch 3/5] Iteration 46000 -> Train Loss: 0.9965, Accuracy: 0.647
[Epoch 3/5] Iteration 47000 -> Train Loss: 0.9724, Accuracy: 0.657
[Epoch 3/5] Iteration 48000 -> Train Loss: 0.9614, Accuracy: 0.663
[Epoch 3/5] Iteration 49000 -> Train Loss: 0.9981, Accuracy: 0.648
[Epoch 3/5] Iteration 50000 -> Train Loss: 0.9951, Accuracy: 0.639
[Epoch 3/5] Iteration 51000 -> Train Loss: 0.9909, Accuracy: 0

  0%|          | 0/4606 [00:00<?, ?it/s]

epoch 3 test acc 0.6179360512458607
| ID | GPU | MEM |
------------------
|  0 | 61% | 50% |


  0%|          | 0/18420 [00:00<?, ?it/s]

[Epoch 4/5] Iteration 56000 -> Train Loss: 0.8685, Accuracy: 0.710
[Epoch 4/5] Iteration 57000 -> Train Loss: 0.8711, Accuracy: 0.690
[Epoch 4/5] Iteration 58000 -> Train Loss: 0.8600, Accuracy: 0.697
[Epoch 4/5] Iteration 59000 -> Train Loss: 0.8424, Accuracy: 0.704
[Epoch 4/5] Iteration 60000 -> Train Loss: 0.8447, Accuracy: 0.701
[Epoch 4/5] Iteration 61000 -> Train Loss: 0.8492, Accuracy: 0.699
[Epoch 4/5] Iteration 62000 -> Train Loss: 0.8465, Accuracy: 0.698
[Epoch 4/5] Iteration 63000 -> Train Loss: 0.8735, Accuracy: 0.689
[Epoch 4/5] Iteration 64000 -> Train Loss: 0.8637, Accuracy: 0.690
[Epoch 4/5] Iteration 65000 -> Train Loss: 0.8579, Accuracy: 0.691
[Epoch 4/5] Iteration 66000 -> Train Loss: 0.8729, Accuracy: 0.683
[Epoch 4/5] Iteration 67000 -> Train Loss: 0.8743, Accuracy: 0.688
[Epoch 4/5] Iteration 68000 -> Train Loss: 0.8652, Accuracy: 0.689
[Epoch 4/5] Iteration 69000 -> Train Loss: 0.8821, Accuracy: 0.681
[Epoch 4/5] Iteration 70000 -> Train Loss: 0.8705, Accuracy: 0

  0%|          | 0/4606 [00:00<?, ?it/s]

epoch 4 test acc 0.6215189186254818
| ID | GPU | MEM |
------------------
|  0 | 47% | 50% |


  0%|          | 0/18420 [00:00<?, ?it/s]

[Epoch 5/5] Iteration 74000 -> Train Loss: 0.8639, Accuracy: 0.745
[Epoch 5/5] Iteration 75000 -> Train Loss: 0.7464, Accuracy: 0.728
[Epoch 5/5] Iteration 76000 -> Train Loss: 0.7572, Accuracy: 0.727
[Epoch 5/5] Iteration 77000 -> Train Loss: 0.7518, Accuracy: 0.732
[Epoch 5/5] Iteration 78000 -> Train Loss: 0.7559, Accuracy: 0.731
[Epoch 5/5] Iteration 79000 -> Train Loss: 0.7602, Accuracy: 0.736
[Epoch 5/5] Iteration 80000 -> Train Loss: 0.7563, Accuracy: 0.731
[Epoch 5/5] Iteration 81000 -> Train Loss: 0.7595, Accuracy: 0.726
[Epoch 5/5] Iteration 82000 -> Train Loss: 0.7631, Accuracy: 0.732
[Epoch 5/5] Iteration 83000 -> Train Loss: 0.7543, Accuracy: 0.726
[Epoch 5/5] Iteration 84000 -> Train Loss: 0.7665, Accuracy: 0.731
[Epoch 5/5] Iteration 85000 -> Train Loss: 0.7795, Accuracy: 0.723
[Epoch 5/5] Iteration 86000 -> Train Loss: 0.7851, Accuracy: 0.727
[Epoch 5/5] Iteration 87000 -> Train Loss: 0.7702, Accuracy: 0.723
[Epoch 5/5] Iteration 88000 -> Train Loss: 0.7898, Accuracy: 0

  0%|          | 0/4606 [00:00<?, ?it/s]