### 0. Environment Settings 

#### 1) Import Library

In [1]:
import pandas as pd 
import numpy as np
import pymysql
import os 
import torch
import time
import math

#### 2) MySQL Connect 

In [2]:
conn = pymysql.connect(host='localhost', user='lamda_00', password='lamda95', db='chatbot', charset='utf8')
curs = conn.cursor()

In [3]:
def execute_sql(sql):
    curs.execute(sql)
    
    return curs.fetchall()

In [4]:
sql = 'SHOW TABLES;'

execute_sql(sql)

(('context_df',),
 ('intensity_df',),
 ('polarity_df',),
 ('response_df',),
 ('wellness_df',))

#### 3) Load data

In [5]:
sql = 'DESC wellness_df;'

execute_sql(sql)

(('idx', 'int(11)', 'NO', 'PRI', None, ''),
 ('intent', 'varchar(100)', 'NO', '', None, ''),
 ('keyword', 'varchar(100)', 'NO', '', None, ''),
 ('utterance', 'varchar(1000)', 'NO', '', None, ''),
 ('intent_label', 'int(11)', 'NO', '', None, ''),
 ('intent_keyword', 'varchar(100)', 'NO', '', None, ''),
 ('ik_label', 'int(11)', 'NO', '', None, ''))

In [6]:
sql = 'DESC response_df;'

execute_sql(sql)

(('idx', 'int(11)', 'NO', 'PRI', None, ''),
 ('Question', 'varchar(1000)', 'NO', '', None, ''),
 ('Answer', 'varchar(1000)', 'NO', '', None, ''))

In [7]:
sql = 'SELECT Question, Answer FROM response_df;'

data = execute_sql(sql)
data[:5]

(('12시 땡!', '하루가 또 가네요.'),
 ('1지망 학교 떨어졌어', '위로해 드립니다.'),
 ('3박4일 놀러가고 싶다', '여행은 언제나 좋죠.'),
 ('3박4일 정도 놀러가고 싶다', '여행은 언제나 좋죠.'),
 ('PPL 심하네', '눈살이 찌푸려지죠.'))

In [8]:
context_list = list(map(lambda x: x[0], data))
candidate_list = list(map(lambda x: x[1], data))

candidate_list[:3]

['하루가 또 가네요.', '위로해 드립니다.', '여행은 언제나 좋죠.']

#### 4) Stop MySQL

In [9]:
conn.close()

In [10]:
len(candidate_list)

11823

### 1. Pretrained tokenizer, model load 

In [11]:
data_path = './'

In [12]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained(
    "beomi/kcbert-base",
    do_lower_case=False,
)

In [13]:
from transformers import BertConfig, BertModel

pretrained_model_config = BertConfig.from_pretrained(
    "beomi/kcbert-base"
)

model = BertModel.from_pretrained(
    "beomi/kcbert-base",
    config=pretrained_model_config,
)

Some weights of the model checkpoint at beomi/kcbert-base were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [14]:
pretrained_model_config

BertConfig {
  "_name_or_path": "beomi/kcbert-base",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 300,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.23.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30000
}

In [15]:
device = torch.device("cuda:0")

### 2. Candidate Embedding 생성

#### 2-1. Candidate feature 추출  (input_ids, token_type_ids, attention_mask)

In [16]:
can_features = tokenizer(
    candidate_list,
    max_length=12,
    padding="max_length",
    truncation=True,
)

# list -> torch.tensor로 형변환 
can_features = {k: torch.tensor(v) for k, v in can_features.items()}

In [17]:
can_features

{'input_ids': tensor([[    2, 21748,  1052,  ...,     0,     0,     0],
         [    2, 12235,  4032,  ...,     0,     0,     0],
         [    2,  9135,  4057,  ...,     0,     0,     0],
         ...,
         [    2,  1849,  6687,  ...,     0,     0,     0],
         [    2,  2483, 22375,  ...,   248, 11363,     3],
         [    2, 26694,  4093,  ...,    17,     3,     0]]),
 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 1, 1, 0]])}

#### 2-2. BERT 모델 전달 

In [18]:
can_outputs = model(**can_features)

In [19]:
can_outputs.last_hidden_state

tensor([[[ 0.7649,  0.5689,  0.1016,  ..., -0.5161,  0.9849,  0.3144],
         [ 0.8451,  0.7389,  0.1029,  ..., -0.6002, -0.8049, -0.0372],
         [ 0.9906,  0.3476, -0.7476,  ...,  0.5393,  0.2272,  0.7750],
         ...,
         [ 0.6325,  1.2666,  0.3002,  ...,  0.1822,  0.3950, -0.1300],
         [ 0.0723,  1.3062,  0.3284,  ...,  1.1223,  0.6953,  0.0620],
         [ 0.8450,  1.4619,  0.3135,  ...,  0.8846,  0.8334,  0.0699]],

        [[-0.0144,  0.1817,  2.0044,  ..., -0.1886, -1.0998, -0.9490],
         [-0.9387, -0.6190,  1.2955,  ...,  1.2500, -1.5786, -0.5666],
         [-0.5842, -0.1151,  0.5393,  ...,  1.3651,  0.4317, -0.5529],
         ...,
         [ 0.6114,  0.4347,  2.2687,  ...,  1.2959, -0.4257, -0.8346],
         [-0.7241,  0.7371,  2.2564,  ...,  0.7626, -0.6816, -1.0712],
         [ 0.6431,  0.0905,  2.3470,  ...,  1.0541, -0.9329, -1.0942]],

        [[ 0.0663, -0.8979,  1.1598,  ...,  0.0529,  0.3811,  0.9280],
         [-0.9288, -0.5393,  0.4914,  ..., -0

In [20]:
np.shape(can_outputs.last_hidden_state)

torch.Size([11823, 12, 768])

In [21]:
cand_emb_list = [can_output[0].detach().numpy() for can_output in can_outputs.last_hidden_state]
np.shape(cand_emb_list[0]), len(cand_emb_list)

((768,), 11823)

In [22]:
np.save(os.path.join(data_path, 'cand_emb.npy'), cand_emb_list)

In [23]:
cand_emb_list = np.load(os.path.join(data_path, 'cand_emb.npy'))
cand_emb_list = torch.Tensor(cand_emb_list).to(device)

### 3. Poly-encoder 내부 동작

#### 3-1. 입력 문장에 대한 Key, Value 생성 

In [24]:
ctxt = '오늘 조금 우울했어'

c_list = [] 
c_list.append(ctxt)

con_features = tokenizer(   # CLS 토큰: input_ids - 2, SEP 토큰: input_ids - 3
    c_list,
    max_length=12,
    padding="max_length",
    truncation=True,
)

con_features = {k: torch.tensor(v) for k, v in con_features.items()}
con_outputs = model(**con_features)

In [25]:
np.shape(con_outputs[0])

torch.Size([1, 12, 768])

In [26]:
keys = con_outputs[0].to(device)
values = con_outputs[0].to(device)
np.shape(keys)

torch.Size([1, 12, 768])

#### 3-2.Code vector 생성 

In [27]:
import torch.nn as nn 
import torch.nn.functional as F

num_poly_codes = 64
poly_code_embeddings = nn.Embedding(num_poly_codes, 768)
poly_code_embeddings

Embedding(64, 768)

In [28]:
batch_size= con_features['input_ids'].size(0)   # 11823
batch_size

1

In [29]:
poly_code_ids = torch.arange(num_poly_codes, dtype=torch.long)
poly_code_ids = poly_code_ids.unsqueeze(0).expand(batch_size, num_poly_codes)
poly_codes = poly_code_embeddings(poly_code_ids).to(device)

In [30]:
poly_codes

tensor([[[ 0.0028,  1.1321, -1.0948,  ..., -0.1663,  0.1290,  0.7286],
         [-0.3181,  0.7342,  2.3381,  ..., -0.8215,  0.9368, -0.2901],
         [-0.0865,  1.4424,  1.0132,  ..., -1.3077, -1.2281, -1.5445],
         ...,
         [ 0.1829, -0.2042,  0.4350,  ..., -1.0578, -1.4291, -0.0060],
         [ 0.9348, -0.3873, -1.4193,  ...,  1.4314, -1.0584,  0.5294],
         [-0.3095, -0.9734,  1.2445,  ...,  0.8609, -0.2726,  1.0917]]],
       device='cuda:0', grad_fn=<ToCopyBackward0>)

In [31]:
np.shape(poly_codes)

torch.Size([1, 64, 768])

#### 3-3. m 번의 Attention 적용

In [32]:
a = torch.Tensor([[1, 2, 3], [4, 5, 6]])

a.transpose(-2, -1)

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])

In [37]:
def dot_attention(query, key, value):
    # start = time.time() 
    d_k = key.shape[-1]   # 차원 수
    attention_score = torch.matmul(query, key.transpose(-2, -1))  # Q x K^T 
    attention_score = attention_score / math.sqrt(d_k)
    attention_prob = F.softmax(attention_score, dim=-1) 
    out = torch.matmul(attention_prob, value)
    # print(f'attention: {round(time.time() - start, 4)}(s)')
    return out 

In [38]:
contexts = dot_attention(poly_codes, keys, values)   #  shape of context: [1, 64, 768]

print(np.shape(contexts))
contexts

torch.Size([1, 64, 768])


tensor([[[-0.9511,  1.1084,  0.5343,  ...,  1.1770,  0.6928,  0.4643],
         [-0.8440,  1.0626,  0.5704,  ...,  1.0553,  0.6216,  0.4649],
         [-0.8834,  0.9825,  0.3839,  ...,  1.0053,  0.7659,  0.3162],
         ...,
         [-0.9909,  1.1230,  0.5928,  ...,  1.1719,  0.5845,  0.5288],
         [-0.9373,  1.0452,  0.4642,  ...,  1.2145,  0.6493,  0.3140],
         [-0.8185,  1.0068,  0.4836,  ...,  1.0503,  0.5845,  0.3462]]],
       device='cuda:0', grad_fn=<UnsafeViewBackward0>)

In [39]:
contexts = dot_attention(poly_codes, keys, values)   #  shape of context: [1, 64, 768]

print(np.shape(contexts))
contexts

torch.Size([1, 64, 768])


tensor([[[-0.9511,  1.1084,  0.5343,  ...,  1.1770,  0.6928,  0.4643],
         [-0.8440,  1.0626,  0.5704,  ...,  1.0553,  0.6216,  0.4649],
         [-0.8834,  0.9825,  0.3839,  ...,  1.0053,  0.7659,  0.3162],
         ...,
         [-0.9909,  1.1230,  0.5928,  ...,  1.1719,  0.5845,  0.5288],
         [-0.9373,  1.0452,  0.4642,  ...,  1.2145,  0.6493,  0.3140],
         [-0.8185,  1.0068,  0.4836,  ...,  1.0503,  0.5845,  0.3462]]],
       device='cuda:0', grad_fn=<UnsafeViewBackward0>)

#### 3-4. 점수 계산 

In [41]:
def get_score(contexts, responses):
    contexts = dot_attention(responses, contexts, contexts)
    # print(np.shape(contexts[2]))
    # print(np.shape(responses))
    score = (contexts * responses).sum(-1)
    return int(score[0][0])

In [42]:
con_emb = dot_attention(cand_emb_list[0], contexts, contexts)
np.shape(con_emb)

torch.Size([1, 1, 768])

In [43]:
np.shape(con_emb[0][0])

torch.Size([768])

In [44]:
get_score(con_emb, cand_emb_list[0])

306

### 4. Poly-encoder 데모

In [45]:
def get_poly_code(num_poly_codes):
    poly_code_embeddings = nn.Embedding(num_poly_codes, 768)
    poly_code_ids = torch.arange(num_poly_codes, dtype=torch.long)
    poly_code_ids = poly_code_ids.unsqueeze(0).expand(batch_size, num_poly_codes)
    poly_codes = poly_code_embeddings(poly_code_ids).to(device)
    
    return poly_codes

In [46]:
def get_answer_poly(ctxt):
    '''
    ctxt을 입력으로 받아 가장 높은 score를 보이는 후보 답변 반환 
    '''
    start = time.time()
    c_list = [] 
    c_list.append(ctxt)
    num_poly_codes = 64
    
    con_features = tokenizer(   # CLS 토큰: input_ids - 2, SEP 토큰: input_ids - 3
        c_list,
        max_length=12,
        padding="max_length",
        truncation=True,
    )
    
    con_features = {k: torch.tensor(v) for k, v in con_features.items()}
    con_outputs = model(**con_features)
    batch_size= con_features['input_ids'].size(0)   # 1
    poly_codes = get_poly_code(num_poly_codes)
    print(f'소요 시간: {round(time.time() - start, 2)}(s)')
    
    keys = con_outputs[0].to(device); values = con_outputs[0].to(device)
    contexts = dot_attention(poly_codes, keys, values)
    
    print(f'소요 시간2: {round(time.time() - start, 2)}(s)')
    
    # contexts = contexts.detach().numpy()
    print(np.shape(contexts))
    score = []
    for can_emb in cand_emb_list:
        con_emb = dot_attention(can_emb, contexts, contexts)
        con_emb = con_emb.cpu().detach().numpy()
        can_emb = can_emb.cpu().detach().numpy()
        score.append(np.dot(con_emb, can_emb))
        
    print(f'소요 시간3: {round(time.time() - start, 2)}(s)')
    return candidate_list[np.argmax(score)]

In [47]:
context = '오늘 날씨 어때 ?'

start = time.time()
print(f'===== Poly-encoder 구현 ====== ') 
print(f'입력 문장: {context}')
print(f'챗봇 대답: {get_answer_poly(context)}')
print(f'소요 시간: {round(time.time() - start, 2)}(s), len(candidate): {len(candidate_list)}')
print(f'==============================')

입력 문장: 오늘 날씨 어때 ?
소요 시간: 0.03(s)
소요 시간2: 0.03(s)
torch.Size([1, 64, 768])
소요 시간3: 2.3(s)
챗봇 대답: 오늘 미세먼지가 많데요.
소요 시간: 2.31(s), len(candidate): 11823
