### 0. Loading data
#### We've attached the raw data. To use this, load the data by `pandas.read_excel()`.

In [None]:
import pandas as pd
import os

root_dir = os.getcwd() + '/data/'
questions_dir = 'Question.xlsx'
train_dir = 'tot_train.xlsx'
train2_dir = 'p2_train.xlsx'
test_dir = 'p2_test.xlsx'

questions = pd.read_excel(os.path.join(root_dir, questions_dir))
questions.drop(['index', 'index.1'], axis='columns', inplace=True)
display(questions.head())

Mounted at /content/drive


Unnamed: 0,Question
0,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...
1,자유 시간 중 상당 부분을 다양한 관심사를 탐구하는 데 할애하나요? 요즘 어떤 관심...
2,다른 사람이 울고 있는 모습을 보면 자신도 울고 싶어질 때가 많나요? 이런 상황에서...
3,일이 잘못될 때를 대비해 여러 대비책을 세우는 편인가요? 이유는 무엇인가요.
4,압박감이 심한 환경에서도 평정심을 유지하는 편인가요? 최근 경험을 말씀해주세요.


### 1. Preprocessing

#### 1-1. Encoding MBTI Labels
Since the lable is given as String type (e.g. ISTP), we have to convert it to vectors.

In [None]:
def MBTI_to_vec(mbti):
  if len(mbti) != 4:
    raise Exception(f"Not valid: {mbti}")

  if mbti[0] == 'I':
    fst = [1, 0]
  elif mbti[0] == 'E':
    fst = [0, 1]
  else:
    raise Exception(f"Not valid: {mbti}")

  if mbti[1] == 'S':
    snd = [1, 0]
  elif mbti[1] == 'N':
    snd = [0, 1]
  else:
    raise Exception(f"Not valid: {mbti}")

  if mbti[2] == 'T':
    trd = [1, 0]
  elif mbti[2] == 'F':
    trd = [0, 1]
  else:
    raise Exception(f"Not valid: {mbti}")
  
  if mbti[3] == 'J':
    fth = [1, 0]
  elif mbti[3] == 'P':
    fth = [0, 1]
  else:
    raise Exception(f"Not valid: {mbti}")
  
  return [fst, snd, trd, fth]

# Unit Test
assert(MBTI_to_vec("ISTJ") == [[1, 0], [1, 0], [1, 0], [1, 0]])
assert(MBTI_to_vec("ENFP") == [[0, 1], [0, 1], [0, 1], [0, 1]])

#### 1-2. Encoding Short Answers
Since the short answer is given as String type (e.g. 그렇다), we have to convert it to vectors.

In [None]:
def short_answer_to_vec(short_answer):
  if short_answer == '그렇다':
    return [[0, 0]]
  
  elif short_answer == '중립/모르겠다':
    return [[0, 1]]
  
  elif short_answer == '아니다':
    return [[1, 0]]
  
  else:
    raise Exception(f"Not valid: {short_answer}")

# Unit Test
assert(short_answer_to_vec("그렇다") == [[0, 0]])
assert(short_answer_to_vec("중립/모르겠다") == [[0, 1]])

#### 1-3. Types(Intention) of Questions
We found that each question is specially designed for each sort of personalities. Therefore, make a list of questions for each sort of personalities.

In [None]:
IE_list = [1, 6, 11, 15, 16, 21, 26, 31, 36, 41, 43, 51, 53]
SN_list = [2, 12, 17, 19, 22, 28, 30, 32, 35, 37, 40, 45, 46, 50, 52, 55]
TF_list = [3, 5, 8, 10, 13, 18, 23, 25, 27, 33, 38, 42, 47, 48, 54, 57, 58]
JP_list = [4, 7, 9, 14, 20, 24, 29, 34, 39, 44, 49, 56, 59, 60]

### 2. Model Proposal
#### 질문별 모델: 질문 별로 모델을 만든 후 60개의 확률 도출 => 질문의 의도에 따라 MBTI 도출

### 3. Implementation of the Question-based Approach
We decide to use pre-trained BERT model as the base model, so let's implement it!

#### 3-0. Data Setup
First of all, we should organize our data with some organized logics.  
In this section, I will load three datasets and aggregate them into two datasets: Questions, Training Data and Test Data.

In [None]:
questions_raw = pd.read_excel(os.path.join(root_dir, questions_dir))
questions = questions_raw.drop(['index', 'index.1'], axis='columns')
display(questions.head())

Unnamed: 0,Question
0,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...
1,자유 시간 중 상당 부분을 다양한 관심사를 탐구하는 데 할애하나요? 요즘 어떤 관심...
2,다른 사람이 울고 있는 모습을 보면 자신도 울고 싶어질 때가 많나요? 이런 상황에서...
3,일이 잘못될 때를 대비해 여러 대비책을 세우는 편인가요? 이유는 무엇인가요.
4,압박감이 심한 환경에서도 평정심을 유지하는 편인가요? 최근 경험을 말씀해주세요.


In [None]:
# Retrieve the original question in String, using the question number
def retrival_q(q_num):
  return questions.loc[q_num - 1]['Question']

# Unit Test
assert(retrival_q(1) == "주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁금해요.")

training = pd.read_excel(os.path.join(root_dir, train_dir))

# We will only use the question-answer pair, at this time.
training.drop(['Data_ID', 'User_ID', 'Gender', 'Age'], axis='columns', inplace=True)

# Retreieve the original question!
training['Question'] = training['Q_number'].apply(retrival_q)
# training.drop('Q_number', axis='columns', inplace=True)

# Reordering
training = training[['Question', 'Q_number', 'Short_Answer', 'Long_Answer', 'MBTI']]

training.head()

Unnamed: 0,Question,Q_number,Short_Answer,Long_Answer,MBTI
0,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,어릴 때 왕따 당한 경험이 있고 외부 활동을 좋아하지 않기 때문에 소수의 친구와만...,INFP
1,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,중립/모르겠다,저는 일부러 만들려고 노력하지는 않아요. 생활하면서 자연스럽게 만들어지는 건 좋아해요.,ESTJ
2,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,꼭 필요한 상황이 아니면 먼저 친해지려 하지 않는다. 친구를 만드는 일도 신경을 ...,ISTP
3,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,저는 새로운 친구보다 오랜 친구를 선호합니다. 나의 가장 친한 친구는 5살 때 유...,INFJ
4,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,저는 주기적으로 새로운 친구를 만들지 않습니다. 이유는 나이가 들수록 새로운 사람...,ISTJ


In [None]:
testing = pd.read_excel(os.path.join(root_dir, test_dir))

# Again, we will only use the question-answer pair, at this moment.
testing.drop(['Data_ID', 'Gender', 'Age'], axis='columns', inplace=True)

# Retreieve the original question!
testing['Question'] = testing['Q_number'].apply(retrival_q)
# testing.drop('Q_number', axis='columns', inplace=True)

# Reordering
testing = testing[['User_ID', 'Question', 'Q_number', 'Short_Answer', 'Long_Answer']]

testing.head()

Unnamed: 0,User_ID,Question,Q_number,Short_Answer,Long_Answer
0,1,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,친구를 만들 상황에 새로운 친구를 만듭니다. 의도적으로나 꼭 주기적으로 새로운 친구...
1,2,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,주기적으로 새로운 친구를 만들지는 않습니다. 자연스러운 만남을 추구하는 스타일로 업...
2,3,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,중립/모르겠다,새로운 친구를 만들기 위해 주기적으로 노력을 하진 않지만 같은 사람들과의 만남이 무...
3,4,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,"주기적으로는 아니나 새로운 친구를 만들고, 만나는 것엔 부담이 없다. 아이가 있어 ..."
4,5,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,중립/모르겠다,저는 기존 친구들과 만나기도 바쁘고 하지만 친구가 새로운 친구 소개해 주면 반갑게 ...


#### 3-1. Preprocessing
For utilizing BERT, we have to satisify its own preprocessing requirements.  
For example, it requires speical tokens such as SEP and CLS in the input.  
But don't worry, since these requirements can be simply satisfied if we use the BERT Tokenizer.


In [None]:
from transformers import BertTokenizerFast, BertModel
import torch

# Loading a tokenizer and a model.
tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")

# Experiment: measure the maximum number of tokens.
max_len = 0
for train_sentence in training['Question']:
  tok = tokenizer_bert.encode(train_sentence)
  max_len = max(max_len, len(tok))

for train_sentence in training['Long_Answer']:
  tok = tokenizer_bert.encode(train_sentence)
  max_len = max(max_len, len(tok))

print(max_len)
# Here, the longest sentence's length is 203 in tokens, so set max_length as 256, safely
max_len = 256

input_ids = []
att_masks = []
short_answers = []
labels = []
q_nums = []

# Preprocessing
for idx in training.index:
  # question = training['Question'][idx]
  short_answer = training['Short_Answer'][idx]
  long_answer = training['Long_Answer'][idx]
  mbti = training['MBTI'][idx]
  q_num = training['Q_number'][idx]

  # Encode with the tokenizer.
  encodings = tokenizer_bert(
      # question,
      long_answer,
      padding = 'max_length',
      max_length = max_len,
      return_tensors = 'pt',
  )

  # Convert MBTI of string to list of integers.
  mbti = MBTI_to_vec(mbti)

  input_ids.append(encodings['input_ids'])
  att_masks.append(encodings['attention_mask'])
  short_answers.append(torch.tensor(short_answer_to_vec(short_answer)))
  labels.append(torch.tensor([mbti]))
  q_nums.append(torch.tensor([[q_num]]))
  
# Convert to tensors.
input_ids = torch.cat(input_ids, dim=0)
att_masks = torch.cat(att_masks, dim=0)
short_answers = torch.cat(short_answers, dim=0)
labels = torch.cat(labels, dim=0)
q_nums = torch.cat(q_nums, dim=0)

203


In [None]:
print(input_ids[0])
print(att_masks[0])
print(short_answers[0])
print(labels[0])
print(q_nums[0])

print(len(input_ids), len(att_masks), len(short_answers), len(labels), len(q_nums))

tensor([    2, 18430,  3463,  5724,  8423, 26850, 20699, 14204, 15916, 17729,
        25878, 18895, 14045, 27024,  8107, 28669,  8120,  6266, 24832,  2016,
            3,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

In [None]:
import random
import numpy as np

# Fix the seeds
seed_val = 50

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

#### 3-2. Dataset & Dataloader Construction

In [None]:
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

# Construct datasets
dataset = TensorDataset(input_ids, att_masks, short_answers, labels, q_nums)

batch_size = 1

'''
###
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

print(f"lengths are {train_size}:{val_size}")

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
###
'''

# Define dataloaders
dataloader = DataLoader(
    dataset,
    sampler = RandomSampler(dataset),
    batch_size = batch_size
)

'''
###
train_dataloader = DataLoader(
    train_dataset,
    sampler = RandomSampler(train_dataset),
    batch_size = batch_size
)

val_dataloader = DataLoader(
    val_dataset,
    sampler = RandomSampler(val_dataset),
    batch_size = batch_size
)
###
'''

'\n###\ntrain_dataloader = DataLoader(\n    train_dataset,\n    sampler = RandomSampler(train_dataset),\n    batch_size = batch_size\n)\n\nval_dataloader = DataLoader(\n    val_dataset,\n    sampler = RandomSampler(val_dataset),\n    batch_size = batch_size\n)\n###\n'

#### 3-3. Model Definition
Now we require a model written in Torch package.  
Because we decided to **fine-tune** the pretrained BERT model, I'm going to add just one layer on top of the pretrained model.

In [None]:
from transformers import BertConfig, get_cosine_schedule_with_warmup
from torch import nn
from torch.optim import AdamW

class MBTIClassifier(nn.Module):
  def __init__ (self,
                bert,
                hidden_size=768,
                num_classes=2,
                dr_rate=None):
    super(MBTIClassifier, self).__init__()
    self.dr_rate = dr_rate
    self.bert = bert
    self.lin = nn.Linear(hidden_size, num_classes)
    self.classifier = nn.Softmax(dim=1)
    '''
    self.linstr = nn.Linear(hidden_size, (int)(hidden_size * (127 / 128)))
    self.linsrt = nn.Linear(2, hidden_size - (int)(hidden_size * (127 / 128)))
    self.lin = nn.Linear(hidden_size, 128)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(128, num_classes)
    self.classifier = nn.Softmax(dim=1)
    '''
    if dr_rate:
      self.dropout = nn.Dropout(p=dr_rate)
  
  def forward(self, input_ids, att_masks, short_answers):
    bert_output = self.bert(input_ids, token_type_ids=None, attention_mask=att_masks).pooler_output
    
    if self.dr_rate:
      dr_output = self.dropout(bert_output)
    else:
      dr_output = bert_output

    lin_output = self.lin(dr_output)
    
    return self.classifier(lin_output)
    '''
    linstr_output = self.linstr(dr_output)
    linsrt_output = self.linsrt(short_answers)

    srt_added = torch.cat((linstr_output, linsrt_output), dim=1)

    lin_output = self.lin(srt_added)
    relu_output = self.relu(lin_output)
    lin2_output = self.lin2(relu_output)

    return self.classifier(lin2_output)
    '''

no_decay = ['bias', 'LayerNorm.weight']
learning_rate = 2e-5
epochs = (15, 1)

cnt = 1
for (idx_list, sort) in zip([IE_list, SN_list, TF_list, JP_list], ['IE', 'SN', 'TF', 'JP']):
  for i in idx_list:
    # if i != 49:
    #   cnt += 1
    #   continue
    
    if i > 48:
      iepochs = epochs[1]
    else:
      iepochs = epochs[0]
    
    total_steps = len(dataloader) * iepochs
    warmup_steps = int(total_steps * 0.1)

    print(f"model: {cnt} / 60")
    model_bert = BertModel.from_pretrained("kykim/bert-kor-base")
    model = MBTIClassifier(model_bert, dr_rate = 0.3)

    optimizer_grouped_parameters = [
      {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
      {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                  lr = learning_rate,
                  eps = 1e-8
                )
        
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = warmup_steps, num_training_steps = total_steps)

    torch.save({
            'model': model,
            'optimizer': optimizer,
            'scheduler': scheduler
            }, f"model_{i}.pt")

    del(model, optimizer, scheduler)
    torch.cuda.empty_cache()

    cnt += 1

model: 1 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 2 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 3 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 4 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 5 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 6 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 7 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 8 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 9 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 10 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 11 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 12 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 13 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 14 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 15 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 16 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 17 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 18 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 19 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 20 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 21 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 22 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 23 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 24 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 25 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 26 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 27 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 28 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 29 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 30 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 31 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 32 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 33 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 34 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 35 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 36 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 37 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 38 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 39 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 40 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 41 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 42 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 43 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 44 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 45 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 46 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 47 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 48 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 49 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 50 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 51 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 52 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 53 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 54 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 55 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 56 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 57 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 58 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 59 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model: 60 / 60


Some weights of the model checkpoint at kykim/bert-kor-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


#### 3-4. Training
Finally, we can do train our model!  
Let's see how accurate our model is :)

In [None]:
import numpy as np

device = torch.device("cuda")

loss_fn = nn.CrossEntropyLoss()

def calc_accuracy(preds, labels):
  preds = preds.detach().cpu().numpy()
  labels = labels.detach().cpu().numpy()
  preds_flat = np.argmax(preds, axis=1).flatten()
  labels_flat = np.argmax(labels, axis=1).flatten()
  return np.sum(preds_flat == labels_flat) / len(labels_flat)

In [None]:
'''
###
cnt = 1

for (idx_list, sort) in zip([IE_list, SN_list, TF_list, JP_list], ['IE', 'SN', 'TF', 'JP']):
  for i in idx_list:
    # if i != 49:
    #   cnt += 1
    #   continue
    
    if i > 48:
      iepochs = epochs[1]
    else:
      iepochs = epochs[0]

    print(f"\n======= model {sort}_{cnt} =======")

    loading = torch.load(f"model_{i}.pt")
    model = loading['model']
    optimizer = loading['optimizer']
    scheduler = loading['scheduler']
    
    model.train()
    model.cuda()
  
    for epoch in range(iepochs):
      print(f"\n======= {epoch + 1} / {iepochs} =======")
      train_acc = 0.0
      train_step = 0
      val_acc = 0.0
      val_step = 0

      for step, batch in enumerate(train_dataloader):
        b_input_id = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_short = batch[2].float().to(device)
        
        if sort == 'IE':
          b_label = torch.tensor(list(map(lambda x: x[0] , batch[3].tolist()))).float().to(device)
        elif sort == 'SN':
          b_label = torch.tensor(list(map(lambda x: x[1] , batch[3].tolist()))).float().to(device)
        elif sort == 'TF':
          b_label = torch.tensor(list(map(lambda x: x[2] , batch[3].tolist()))).float().to(device)
        else:
          b_label = torch.tensor(list(map(lambda x: x[3] , batch[3].tolist()))).float().to(device)
        
        b_q_num = int(batch[4][0])

        if b_q_num != i:
          continue
        
        optimizer.zero_grad()

        b_out = model(b_input_id, b_input_mask, b_short)
        loss = loss_fn(b_out, b_label)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()

        train_acc += calc_accuracy(b_out, b_label)
        train_step += 1

        if step > 15000:
          print(f"epoch {epoch + 1} batch id {step} loss {loss.data.cpu().numpy()} train acc {train_acc / train_step}")
      
      model.eval()
      for step, batch in enumerate(val_dataloader):
        b_input_id = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_short = batch[2].float().to(device)

        if sort == 'IE':
          b_label = torch.tensor(list(map(lambda x: x[0] , batch[3].tolist()))).float().to(device)
        elif sort == 'SN':
          b_label = torch.tensor(list(map(lambda x: x[1] , batch[3].tolist()))).float().to(device)
        elif sort == 'TF':
          b_label = torch.tensor(list(map(lambda x: x[2] , batch[3].tolist()))).float().to(device)
        else:
          b_label = torch.tensor(list(map(lambda x: x[3] , batch[3].tolist()))).float().to(device)
        
        b_q_num = int(batch[4][0])

        if b_q_num != i:
          continue
        
        with torch.no_grad():
          b_out = model(b_input_id, b_input_mask, b_short)
        
        val_acc += calc_accuracy(b_out, b_label)
        val_step += 1

      print(f"epoch {epoch + 1} size {val_step} validation acc {val_acc / val_step}")
    
    torch.save({
            'model': model,
            'optimizer': optimizer,
            'scheduler': scheduler
            }, f"model_{i}.pt")

    del(model, optimizer, scheduler)
    torch.cuda.empty_cache()

    cnt += 1

###
'''



epoch 1 batch id 15066 loss 0.6246479153633118 train acc 0.5
epoch 1 batch id 15134 loss 0.6105066537857056 train acc 0.5051546391752577
epoch 1 batch id 15427 loss 0.6723166108131409 train acc 0.5102040816326531
epoch 1 batch id 15447 loss 0.7272826433181763 train acc 0.5050505050505051
epoch 1 batch id 15463 loss 0.7480919361114502 train acc 0.5
epoch 1 batch id 15501 loss 0.6603890657424927 train acc 0.504950495049505
epoch 1 batch id 15531 loss 0.9035084247589111 train acc 0.5
epoch 1 batch id 15647 loss 0.7314177751541138 train acc 0.49514563106796117
epoch 1 batch id 16038 loss 0.6845346689224243 train acc 0.5
epoch 1 batch id 16059 loss 0.6100777387619019 train acc 0.5047619047619047
epoch 1 batch id 16169 loss 0.8777186870574951 train acc 0.5
epoch 1 batch id 16745 loss 0.7407786250114441 train acc 0.4953271028037383
epoch 1 size 13 validation acc 0.5384615384615384


In [None]:
cnt = 1

for (idx_list, sort) in zip([IE_list, SN_list, TF_list, JP_list], ['IE', 'SN', 'TF', 'JP']):
  for i in idx_list:
    if i > 48:
      iepochs = epochs[1]
    else:
      iepochs = epochs[0]
    
    print(f"\n======= model {sort}_{cnt} =======")

    loading = torch.load(f"model_{i}.pt")
    model = loading['model']
    optimizer = loading['optimizer']
    scheduler = loading['scheduler']
    
    model.train()
    model.cuda()
  
    for epoch in range(iepochs):
      print(f"\n======= {epoch + 1} / {iepochs} =======")
      train_acc = 0.0
      train_step = 0

      for step, batch in enumerate(dataloader):
        b_input_id = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_short = batch[2].float().to(device)
        
        if sort == 'IE':
          b_label = torch.tensor(list(map(lambda x: x[0] , batch[3].tolist()))).float().to(device)
        elif sort == 'SN':
          b_label = torch.tensor(list(map(lambda x: x[1] , batch[3].tolist()))).float().to(device)
        elif sort == 'TF':
          b_label = torch.tensor(list(map(lambda x: x[2] , batch[3].tolist()))).float().to(device)
        else:
          b_label = torch.tensor(list(map(lambda x: x[3] , batch[3].tolist()))).float().to(device)
        
        b_q_num = int(batch[4][0])

        if b_q_num != i:
          continue
        
        optimizer.zero_grad()

        b_out = model(b_input_id, b_input_mask, b_short)
        loss = loss_fn(b_out, b_label)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()

        train_acc += calc_accuracy(b_out, b_label)
        train_step += 1

        if step > 15000:
          print(f"epoch {epoch + 1} batch id {step} loss {loss.data.cpu().numpy()} train acc {train_acc / train_step}")
    
    torch.save({
            'model': model,
            'optimizer': optimizer,
            'scheduler': scheduler
            }, f"model_{i}.pt")

    del(model, optimizer, scheduler)
    torch.cuda.empty_cache()

    cnt += 1

#### 3-5. Export Results
Using our trained model, produce the output for real test inputs (variable`testing`)

In [None]:
from transformers import BertTokenizerFast, BertModel
import torch

# Loading a tokenizer and a model.
tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")

# Experiment: measure the maximum number of tokens.
max_len = 0
for test_sentence in testing['Question']:
  tok = tokenizer_bert.encode(test_sentence)
  max_len = max(max_len, len(tok))

for test_sentence in testing['Long_Answer']:
  tok = tokenizer_bert.encode(test_sentence)
  max_len = max(max_len, len(tok))

print(max_len)
# Here, the longest sentence's length is 105 in tokens, but set max_length as 256, as before
max_len = 256

input_ids = []
att_masks = []
short_answers = []
q_nums = []
user_ids = []

# Preprocessing
for idx in testing.index:
  question = testing['Question'][idx]
  short_answer = testing['Short_Answer'][idx]
  long_answer = testing['Long_Answer'][idx]
  q_num = testing['Q_number'][idx]
  user_id = testing['User_ID'][idx]

  # Encode with the tokenizer.
  encodings = tokenizer_bert(
      question,
      long_answer,
      padding = 'max_length',
      max_length = max_len,
      return_tensors = 'pt',
  )

  input_ids.append(encodings['input_ids'])
  att_masks.append(encodings['attention_mask'])
  short_answers.append(torch.tensor(short_answer_to_vec(short_answer)))
  q_nums.append(torch.tensor([[q_num]]))
  user_ids.append(torch.tensor([[user_id]]))

# Convert to tensors.
input_ids = torch.cat(input_ids, dim=0)
att_masks = torch.cat(att_masks, dim=0)
short_answers = torch.cat(short_answers, dim=0)
q_nums = torch.cat(q_nums, dim=0)
user_ids = torch.cat(user_ids, dim=0)

209


In [None]:
# Construct datasets
dataset = TensorDataset(input_ids, att_masks, short_answers, q_nums, user_ids)

batch_size = 1 # each person, each question

# Define dataloaders
dataloader = DataLoader(
    dataset,
    sampler = SequentialSampler(dataset),
    batch_size = batch_size
)

In [None]:
probs = []
preds = []
users = []

users_IE = dict()
users_SN = dict()
users_TF = dict()
users_JP = dict()

cur_q = 0

for step, batch in enumerate(dataloader):
  print(f"-------------- {step} / {len(dataloader)} --------------")
  b_input_id = batch[0].to(device)
  b_input_mask = batch[1].to(device)
  b_short = batch[2].float().to(device)
  b_q_num = int(batch[3][0])
  b_user_id = int(batch[4][0])

  if not b_user_id in users:
    users.append(b_user_id)
  
  if cur_q != b_q_num:
    cur_q = b_q_num

    model = torch.load(f"model_{b_q_num}.pt")['model']
    model.eval()
    model.cuda()

  with torch.no_grad():
    b_out = model(b_input_id, b_input_mask, b_short)
  
  b_out_np = b_out.detach().cpu().numpy().tolist()[0]

  if b_q_num in IE_list:
    if b_user_id in users_IE.keys():
      users_IE[b_user_id].append(b_out_np[1])
    else:
      users_IE[b_user_id] = [b_out_np[1]]
    
  elif b_q_num in SN_list:
    if b_user_id in users_SN.keys():
      users_SN[b_user_id].append(b_out_np[1])
    else:
      users_SN[b_user_id] = [b_out_np[1]]
  
  elif b_q_num in TF_list:
    if b_user_id in users_TF.keys():
      users_TF[b_user_id].append(b_out_np[1])
    else:
      users_TF[b_user_id] = [b_out_np[1]]
  
  else:
    if b_user_id in users_JP.keys():
      users_JP[b_user_id].append(b_out_np[1])
    else:
      users_JP[b_user_id] = [b_out_np[1]]

for prob_IE, prob_SN, prob_TF, prob_JP in zip(users_IE.values(), users_SN.values(), users_TF.values(), users_JP.values()):
  avg_prob_IE = float (sum(prob_IE) / len(prob_IE))
  avg_prob_SN = float (sum(prob_SN) / len(prob_SN))
  avg_prob_TF = float (sum(prob_TF) / len(prob_TF))
  avg_prob_JP = float (sum(prob_JP) / len(prob_JP))

  probs.append([avg_prob_IE, avg_prob_SN, avg_prob_TF, avg_prob_JP])
  preds.append(list(map(lambda x: 1 if x > 0.5 else 0, [avg_prob_IE, avg_prob_SN, avg_prob_TF, avg_prob_JP])))

In [None]:
print(len(users), len(users_IE.values()), len(probs), len(preds))

preds_df = {'User_ID': users, 'I/E': list(map(lambda x:x[0], preds)), 'S/N': list(map(lambda x:x[1], preds)), 'T/F': list(map(lambda x:x[2], preds)), 'J/P': list(map(lambda x:x[3], preds))}
preds_df = pd.DataFrame(data=preds_df)
preds_df = preds_df.set_index('User_ID')
preds_df.to_csv('result.csv')

probs_df = {'User_ID': users, 'I/E': list(map(lambda x:x[0], probs)), 'S/N': list(map(lambda x:x[1], probs)), 'T/F': list(map(lambda x:x[2], probs)), 'J/P': list(map(lambda x:x[3], probs))}
probs_df = pd.DataFrame(data=probs_df)
probs_df = probs_df.set_index('User_ID')
probs_df.to_csv('result_prob.csv')

120 120 120 120


### 4. Expansion of the Question-based Approach
Now, aggregate results using a deep learning model.

#### 4-0. Data Setup
First of all, we should organize our data with some organized logics.  
In this section, I will load three datasets and aggregate them into two datasets: Questions, Training Data and Test Data.

In [None]:
questions_raw = pd.read_excel(os.path.join(root_dir, questions_dir))
questions = questions_raw.drop(['index', 'index.1'], axis='columns')
display(questions.head())

Unnamed: 0,Question
0,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...
1,자유 시간 중 상당 부분을 다양한 관심사를 탐구하는 데 할애하나요? 요즘 어떤 관심...
2,다른 사람이 울고 있는 모습을 보면 자신도 울고 싶어질 때가 많나요? 이런 상황에서...
3,일이 잘못될 때를 대비해 여러 대비책을 세우는 편인가요? 이유는 무엇인가요.
4,압박감이 심한 환경에서도 평정심을 유지하는 편인가요? 최근 경험을 말씀해주세요.


In [None]:
# Retrieve the original question in String, using the question number
def retrival_q(q_num):
  return questions.loc[q_num - 1]['Question']

# Unit Test
assert(retrival_q(1) == "주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁금해요.")

training = pd.read_excel(os.path.join(root_dir, train2_dir))

# We will only use the question-answer pair, at this time.
training.drop(['Data_ID', 'Gender', 'Age'], axis='columns', inplace=True)

# Retreieve the original question!
training['Question'] = training['Q_number'].apply(retrival_q)
# training.drop('Q_number', axis='columns', inplace=True)

# Reordering
training = training[['Question', 'Q_number', 'User_ID', 'Short_Answer', 'Long_Answer', 'MBTI']]

training.head()

Unnamed: 0,Question,Q_number,User_ID,Short_Answer,Long_Answer,MBTI
0,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,9,아니다,새로운 사람을 만나서 이야기를 나누고 마음이 통하는 친구가 되기까지의 과정은 많은 ...,INTJ
1,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,18,중립/모르겠다,저는 주기적으로는 새로운 친구들을 만들지 않습니다. 이유는 새로운 친구들을 사기기 ...,ENTJ
2,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,48,아니다,저는 굳이 새로운 친구들을 만들려고 하지 않아요. 원래 친하던 친구들과 만나는 게 ...,ESFJ
3,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,54,아니다,저는 주기적으로 새로운 친구를 잘 만드는 편은 아닙니다. 최근에 재택근무를 하면서 ...,ENFJ
4,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,61,그렇다,새로운 친구를 만들면서 그들의 일상을 듣고 나누는 것이 재미있습니다. 새로운 사람을...,ESFJ


In [None]:
testing = pd.read_excel(os.path.join(root_dir, test_dir))

# Again, we will only use the question-answer pair, at this moment.
testing.drop(['Data_ID', 'Gender', 'Age'], axis='columns', inplace=True)

# Retreieve the original question!
testing['Question'] = testing['Q_number'].apply(retrival_q)
# testing.drop('Q_number', axis='columns', inplace=True)

# Reordering
testing = testing[['User_ID', 'Question', 'Q_number', 'Short_Answer', 'Long_Answer']]

testing.head()

Unnamed: 0,User_ID,Question,Q_number,Short_Answer,Long_Answer
0,1,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,친구를 만들 상황에 새로운 친구를 만듭니다. 의도적으로나 꼭 주기적으로 새로운 친구...
1,2,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,주기적으로 새로운 친구를 만들지는 않습니다. 자연스러운 만남을 추구하는 스타일로 업...
2,3,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,중립/모르겠다,새로운 친구를 만들기 위해 주기적으로 노력을 하진 않지만 같은 사람들과의 만남이 무...
3,4,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,아니다,"주기적으로는 아니나 새로운 친구를 만들고, 만나는 것엔 부담이 없다. 아이가 있어 ..."
4,5,주기적으로 새로운 친구를 만드나요? 경험을 비추어봤을 때 어떤지와 그러한 이유가 궁...,1,중립/모르겠다,저는 기존 친구들과 만나기도 바쁘고 하지만 친구가 새로운 친구 소개해 주면 반갑게 ...


#### 4-1. Preprocessing
For utilizing BERT, we have to satisify its own preprocessing requirements.  
For example, it requires speical tokens such as SEP and CLS in the input.  
But don't worry, since these requirements can be simply satisfied if we use the BERT Tokenizer.


In [None]:
from transformers import BertTokenizerFast, BertModel
import torch

# Loading a tokenizer and a model.
tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")

# Experiment: measure the maximum number of tokens.
max_len = 0
for train_sentence in training['Question']:
  tok = tokenizer_bert.encode(train_sentence)
  max_len = max(max_len, len(tok))

for train_sentence in training['Long_Answer']:
  tok = tokenizer_bert.encode(train_sentence)
  max_len = max(max_len, len(tok))

print(max_len)
# Here, the longest sentence's length is 203 in tokens, so set max_length as 256, safely
max_len = 256

probabilities = dict()
labels = []
user_ids = []

cur_q_num = 1
model = torch.load(f"model_1.pt")['model']
model.eval()
model.cuda()

# Preprocessing
for idx in training.index:
  short_answer = training['Short_Answer'][idx]
  long_answer = training['Long_Answer'][idx]
  mbti = training['MBTI'][idx]
  user_id = training['User_ID'][idx]
  q_num = training['Q_number']

  if cur_q_num != q_num:
    cur_q_num = q_num
    del model
    model = torch.load(f"model_{q_num}.pt")['model']
    model.eval()
    model.cuda()

  # Encode with the tokenizer.
  encodings = tokenizer_bert(
      question,
      long_answer,
      padding = 'max_length',
      max_length = max_len,
      return_tensors = 'pt',
  )

  prob = model(encodings['input_ids'], encodings['attention_mask'], short_answer)

  # Convert MBTI of string to list of integers.
  mbti = MBTI_to_vec(mbti)
  
  if user_id in probabilities.keys():
    probabilities[user_id].append(prob)
  else:
    probabilities[user_id] = [prob]
  
  labels.append(torch.tensor([mbti]))
  user_ids.append(torch.tensor([[user_id]]))

del model

prob_list = []
for user in user_ids:
  user = int(user[0][0])
  for prob_in in probabilitixes[user]:
    prob_list.append(torch.from_numpy(prob_in))

# Convert to tensors.
probabilities = torch.cat(prob_list, dim=0)
labels = torch.cat(labels, dim=0)
user_ids = torch.cat(user_ids, dim=0)

In [None]:
print(probabilities[0])
print(type(probabilities[0]))
print(labels[0])
print(user_ids[0])

print(len(probabilities), len(labels), len(q_nums), len(user_ids))

In [None]:
import random
import numpy as np

# Fix the seeds
seed_val = 50

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

#### 4-2. Dataset & Dataloader Construction

In [None]:
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

# Construct datasets
dataset = TensorDataset(probabilities, labels, user_ids)

batch_size = 60

'''
###
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

print(f"lengths are {train_size}:{val_size}")

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
###
'''

# Define dataloaders
dataloader = DataLoader(
    dataset,
    sampler = SequentialSampler(dataset),
    batch_size = batch_size
)

'''
###
train_dataloader = DataLoader(
    train_dataset,
    sampler = RandomSampler(train_dataset),
    batch_size = batch_size
)

val_dataloader = DataLoader(
    val_dataset,
    sampler = RandomSampler(val_dataset),
    batch_size = batch_size
)
###
'''

'\n###\ntrain_dataloader = DataLoader(\n    train_dataset,\n    sampler = RandomSampler(train_dataset),\n    batch_size = batch_size\n)\n\nval_dataloader = DataLoader(\n    val_dataset,\n    sampler = RandomSampler(val_dataset),\n    batch_size = batch_size\n)\n###\n'

#### 4-3. Model Definition
Now we require a model written in Torch package.  
Because we decided to **fine-tune** the pretrained BERT model, I'm going to add just one layer on top of the pretrained model.

In [None]:
from transformers import BertConfig, get_cosine_schedule_with_warmup
from torch import nn
from torch.optim import AdamW

class MBTIAggregator(nn.Module): # 모델마다 사이즈 달라질 수도 있음
  def __init__ (self,
                hidden_size=1, # ?
                num_classes=2):
    super(MBTIAggregator, self).__init__()

    self.lin = nn.Linear(hidden_size, 8)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(8, num_classes)
    self.classifier = nn.Softmax(dim=1)
    '''
    self.linstr = nn.Linear(hidden_size, (int)(hidden_size * (127 / 128)))
    self.linsrt = nn.Linear(2, hidden_size - (int)(hidden_size * (127 / 128)))
    self.lin = nn.Linear(hidden_size, 128)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(128, num_classes)
    self.classifier = nn.Softmax(dim=1)
    '''
    
  def forward(self, probabilities):

    lin_output = self.lin(probabilities)
    relu_output = self.relu(lin_output)
    lin2_output = self.lin2(relu_output)
    
    return self.classifier(lin2_output)
    '''
    linstr_output = self.linstr(dr_output)
    linsrt_output = self.linsrt(short_answers)

    srt_added = torch.cat((linstr_output, linsrt_output), dim=1)

    lin_output = self.lin(srt_added)
    relu_output = self.relu(lin_output)
    lin2_output = self.lin2(relu_output)

    return self.classifier(lin2_output)
    '''

no_decay = ['bias', 'LayerNorm.weight']
learning_rate = 2e-5
epochs = 10

for (sort_list, sort) in zip([IE_list, SN_list, TF_list, JP_list], ['IE', 'SN', 'TF', 'JP']):
  total_steps = len(dataloader) * epcohs
  warmup_steps = int(total_steps * 0.1)

  print(f"model_{sort}")
  model = MBTIAggregator(hidden_size=len(sort_list))

  optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
  ]

  optimizer = AdamW(optimizer_grouped_parameters,
                lr = learning_rate,
                eps = 1e-8
              )
        
  scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = warmup_steps, num_training_steps = total_steps)

  torch.save({
          'model': model,
          'optimizer': optimizer,
          'scheduler': scheduler
          }, f"model_{sort}.pt")

  del(model, optimizer, scheduler)
  torch.cuda.empty_cache()

#### 4-4. Training
Finally, we can do train our model!  
Let's see how accurate our model is :)

In [None]:
import numpy as np

device = torch.device("cuda")

loss_fn = nn.CrossEntropyLoss()

def calc_accuracy(preds, labels):
  preds = preds.detach().cpu().numpy()
  labels = labels.detach().cpu().numpy()
  preds_flat = np.argmax(preds, axis=1).flatten()
  labels_flat = np.argmax(labels, axis=1).flatten()
  return np.sum(preds_flat == labels_flat) / len(labels_flat)

In [None]:
'''
###
cnt = 1

for (idx_list, sort) in zip([IE_list, SN_list, TF_list, JP_list], ['IE', 'SN', 'TF', 'JP']):
  for i in idx_list:
    # if i != 49:
    #   cnt += 1
    #   continue
    
    if i > 48:
      iepochs = epochs[1]
    else:
      iepochs = epochs[0]

    print(f"\n======= model {sort}_{cnt} =======")

    loading = torch.load(f"model_{i}.pt")
    model = loading['model']
    optimizer = loading['optimizer']
    scheduler = loading['scheduler']
    
    model.train()
    model.cuda()
  
    for epoch in range(iepochs):
      print(f"\n======= {epoch + 1} / {iepochs} =======")
      train_acc = 0.0
      train_step = 0
      val_acc = 0.0
      val_step = 0

      for step, batch in enumerate(train_dataloader):
        b_input_id = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_short = batch[2].float().to(device)
        
        if sort == 'IE':
          b_label = torch.tensor(list(map(lambda x: x[0] , batch[3].tolist()))).float().to(device)
        elif sort == 'SN':
          b_label = torch.tensor(list(map(lambda x: x[1] , batch[3].tolist()))).float().to(device)
        elif sort == 'TF':
          b_label = torch.tensor(list(map(lambda x: x[2] , batch[3].tolist()))).float().to(device)
        else:
          b_label = torch.tensor(list(map(lambda x: x[3] , batch[3].tolist()))).float().to(device)
        
        b_q_num = int(batch[4][0])

        if b_q_num != i:
          continue
        
        optimizer.zero_grad()

        b_out = model(b_input_id, b_input_mask, b_short)
        loss = loss_fn(b_out, b_label)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()

        train_acc += calc_accuracy(b_out, b_label)
        train_step += 1

        if step > 15000:
          print(f"epoch {epoch + 1} batch id {step} loss {loss.data.cpu().numpy()} train acc {train_acc / train_step}")
      
      model.eval()
      for step, batch in enumerate(val_dataloader):
        b_input_id = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_short = batch[2].float().to(device)

        if sort == 'IE':
          b_label = torch.tensor(list(map(lambda x: x[0] , batch[3].tolist()))).float().to(device)
        elif sort == 'SN':
          b_label = torch.tensor(list(map(lambda x: x[1] , batch[3].tolist()))).float().to(device)
        elif sort == 'TF':
          b_label = torch.tensor(list(map(lambda x: x[2] , batch[3].tolist()))).float().to(device)
        else:
          b_label = torch.tensor(list(map(lambda x: x[3] , batch[3].tolist()))).float().to(device)
        
        b_q_num = int(batch[4][0])

        if b_q_num != i:
          continue
        
        with torch.no_grad():
          b_out = model(b_input_id, b_input_mask, b_short)
        
        val_acc += calc_accuracy(b_out, b_label)
        val_step += 1

      print(f"epoch {epoch + 1} size {val_step} validation acc {val_acc / val_step}")
    
    torch.save({
            'model': model,
            'optimizer': optimizer,
            'scheduler': scheduler
            }, f"model_{i}.pt")

    del(model, optimizer, scheduler)
    torch.cuda.empty_cache()

    cnt += 1

###
'''

In [None]:
for epoch in range(epochs):
  print(f"\n======= {epoch + 1} / {epcohs} =======")
  train_acc = 0.0
  train_step = 0

  for step, batch in enumerate(dataloader):
    b_probabilities = batch[0]
    b_label = batch[1][0]
    b_user_id = int(batch[2][0])

    for sort in ['IE', 'SN', 'TF', 'JP']:
      print(f"======= model_{sort} =======")

      loading = torch.load(f"model_{sort}.pt")

      model = loading['model']
      optimizer = loading['optimizer']
      scheduler = loading['scheduler']

      model.train()
      model.cuda()

      if sort == 'IE':
        in_label = b_label[0]
        for q in IE_list:
          in_probabilities[]
      
      elif sort == 'SN':
        in_label = b_label[1]

      elif sort == 'TF':
        in_label = b_label[2]
      
      else:
        in_label = b_label[3]








for sort in ['IE', 'SN', 'TF', 'JP']:
  print(f"======= model_{sort} =======")

  loading = torch.load(f"model_{sort}.pt")
  
  model = loading['model']
  optimizer = loading['optimizer']
  scheduler = loading['scheduler']

  model.train()
  model.cuda()
  
  for epoch in range(epochs):
    print(f"\n======= {epoch + 1} / {epcohs} =======")
    train_acc = 0.0
    train_step = 0

    for step, batch in enumerate(dataloader):
      b_probabilities = batch[0]
      b_probabilities = torch.cat(b_probabilities, dim=0).to(device)
      
      if sort == 'IE':
        b_label = batch[1][0][0]
      elif sort == 'SN':
        b_label = batch[1][0][1]
      elif sort == 'TF':
        b_label = batch[1][0][2]
      else:
        b_label = batch[1][0][3]
      
      b_user_id = int(batch[2][0])

      optimizer.zero_grad()

      b_out = model(b_probabilities) # 크기 확인 필요
      loss = loss_fn(b_out, b_label)
      loss.backward()

      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

      optimizer.step()
      scheduler.step()

      train_acc += calc_accurach(b_out, b_label)
      train_step += 1
    
      torch.save({
              'model': model,
              'optimizer': optimizer,
              'scheduler': scheduler
              }, f"model_{sort}.pt")

    del(model, optimizer, scheduler)
    torch.cuda.empty_cache()

#### 4-5. Export Results
Using our trained model, produce the output for real test inputs (variable`testing`)

In [None]:
from transformers import BertTokenizerFast, BertModel
import torch

# Loading a tokenizer and a model.
tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")

# Experiment: measure the maximum number of tokens.
max_len = 0
for test_sentence in testing['Question']:
  tok = tokenizer_bert.encode(test_sentence)
  max_len = max(max_len, len(tok))

for test_sentence in testing['Long_Answer']:
  tok = tokenizer_bert.encode(test_sentence)
  max_len = max(max_len, len(tok))

print(max_len)
# Here, the longest sentence's length is 105 in tokens, but set max_length as 256, as before
max_len = 256

input_ids = []
att_masks = []
short_answers = []
q_nums = []
user_ids = []

# Preprocessing
for idx in testing.index:
  question = testing['Question'][idx]
  short_answer = testing['Short_Answer'][idx]
  long_answer = testing['Long_Answer'][idx]
  q_num = testing['Q_number'][idx]
  user_id = testing['User_ID'][idx]

  # Encode with the tokenizer.
  encodings = tokenizer_bert(
      question,
      long_answer,
      padding = 'max_length',
      max_length = max_len,
      return_tensors = 'pt',
  )

  input_ids.append(encodings['input_ids'])
  att_masks.append(encodings['attention_mask'])
  short_answers.append(torch.tensor(short_answer_to_vec(short_answer)))
  q_nums.append(torch.tensor([[q_num]]))
  user_ids.append(torch.tensor([[user_id]]))

# Convert to tensors.
input_ids = torch.cat(input_ids, dim=0)
att_masks = torch.cat(att_masks, dim=0)
short_answers = torch.cat(short_answers, dim=0)
q_nums = torch.cat(q_nums, dim=0)
user_ids = torch.cat(user_ids, dim=0)

209


In [None]:
# Construct datasets
dataset = TensorDataset(input_ids, att_masks, short_answers, q_nums, user_ids)

batch_size = 1 # each person, each question

# Define dataloaders
dataloader = DataLoader(
    dataset,
    sampler = SequentialSampler(dataset),
    batch_size = batch_size
)

In [None]:
probs = dict()
preds = dict()
users = []

users_IE = dict()
users_SN = dict()
users_TF = dict()
users_JP = dict()

cur_q = 0

for step, batch in enumerate(dataloader):
  if step % 60 == 0:
    print(f"-------------- {(step)/60 + 1} / {len(dataloader)/60} --------------")
  
  b_input_id = batch[0].to(device)
  b_input_mask = batch[1].to(device)
  b_short = batch[2].float().to(device)
  b_q_num = int(batch[3][0])
  b_user_id = int(batch[4][0])

  if not b_user_id in users:
    users.append(b_user_id)
  
  if cur_q != b_q_num:
    cur_q = b_q_num

    model = torch.load(f"model_{b_q_num}.pt")['model']
    model.eval()
    model.cuda()

  with torch.no_grad():
    b_out = model(b_input_id, b_input_mask, b_short)
  
  b_out_np = b_out.detach().cpu().numpy().tolist()[0]

  if b_q_num in IE_list:
    if b_user_id in users_IE.keys():
      users_IE[b_user_id].append(b_out_np[1])
    else:
      users_IE[b_user_id] = [b_out_np[1]]
    
  elif b_q_num in SN_list:
    if b_user_id in users_SN.keys():
      users_SN[b_user_id].append(b_out_np[1])
    else:
      users_SN[b_user_id] = [b_out_np[1]]
  
  elif b_q_num in TF_list:
    if b_user_id in users_TF.keys():
      users_TF[b_user_id].append(b_out_np[1])
    else:
      users_TF[b_user_id] = [b_out_np[1]]
  
  else:
    if b_user_id in users_JP.keys():
      users_JP[b_user_id].append(b_out_np[1])
    else:
      users_JP[b_user_id] = [b_out_np[1]]

for user in users:  
  prob_IE, prob_SN, prob_TF, prob_JP = users_IE[user], users_SN[user], users_TF[user], users_JP[user]

  model_IE = torch.load(f"model_IE.pt")['model']
  model_IE.eval()
  model_IE.cuda()
  
  prob_IE = model_IE(torch.cat(prob_IE, dim=0))
  prob_IE = prob_IE.detach().cpu().numpy().float()

  del model_IE

  model_SN = torch.load("model_SN.pt")['model']
  model_SN.eval()
  model_SN.cuda()

  prob_SN = model_SN(torch.cat(prob_SN, dim=0))
  prob_SN = prob_SN.detach().cpu().numpy().float()
  
  del model_SN

  model_TF = torch.load("model_TF.pt")['model']
  model_TF.eval()
  model_TF.cuda()

  prob_TF = model_TF(torch.cat(prob_TF, dim=0))
  prob_TF = prob_TF.detach().cpu().numpy().float()

  del model_TF

  model_JP = torch.load("model_JP.pt")['model']
  model_JP.eval()
  model_JP.cuda()

  prob_JP = model_JP(torch.cat(prob_JP, dim=0))
  prob_JP = prob_JP.detach().cpu().numpy().float()

  del model_JP

  probs[user] = [prob_IE, prob_SN, prob_TF, prob_JP]
  preds[user] = list(map(lambda x: 1 if x > 0.5 else 0, [prob_IE, prob_SN, prob_TF, prob_JP]))

In [None]:
preds_ls = []
probs_ls = []

for user in users:
    preds_ls.append(preds[user])
    probs_ls.append(probs[user])

preds_df = {'User_ID': users, 'I/E': list(map(lambda x:x[0], preds_ls)), 'S/N': list(map(lambda x:x[1], preds_ls)), 'T/F': list(map(lambda x:x[2], preds_ls)), 'J/P': list(map(lambda x:x[3], preds_ls))}
preds_df = pd.DataFrame(data=preds_df)
preds_df = preds_df.set_index('User_ID')
preds_df.to_csv('result.csv')

probs_df = {'User_ID': users, 'I/E': list(map(lambda x:x[0], probs_ls)), 'S/N': list(map(lambda x:x[1], probs_ls)), 'T/F': list(map(lambda x:x[2], probs_ls)), 'J/P': list(map(lambda x:x[3], probs_ls))}
probs_df = pd.DataFrame(data=probs_df)
probs_df = probs_df.set_index('User_ID')
probs_df.to_csv('result_prob.csv')

120 120 120 120
