# BERT 구현

In [1]:
import math
import numpy as np

import torch
from torch import nn


In [2]:
import json

config_file = "./weights/bert_config.json"

json_file = open(config_file, 'r')
config = json.load(json_file)

config

{'attention_probs_dropout_prob': 0.1,
 'hidden_act': 'gelu',
 'hidden_dropout_prob': 0.1,
 'hidden_size': 768,
 'initializer_range': 0.02,
 'intermediate_size': 3072,
 'max_position_embeddings': 512,
 'num_attention_heads': 12,
 'num_hidden_layers': 12,
 'type_vocab_size': 2,
 'vocab_size': 30522}

In [3]:
from attrdict import AttrDict

config = AttrDict(config)
config.hidden_size

768

## BERT에 레이어 정규화층 정의

In [4]:
class BertLayerNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-12):
        super(BertLayerNorm, self).__init__()
        self.gamma = nn.Parameter(torch.ones(hidden_size))
        self.beta = nn.Parameter(torch.zeros(hidden_size))
        self.variance_epsilon = eps

    def forward(self, x):
        u = x.mean(-1, keepdim=True)
        s = (x-u).pow(2).mean(-1, keepdim=True)
        x = (x-u) / torch.sqrt(s + self.variance_epsilon)
        return self.gamma * x + self.beta
        

## Embeddings 모듈 구현

In [5]:
class BertEmbeddings(nn.Module):
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()

        # Token Embedding
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0)

        # Transformer Positional Embedding
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)

        # Sentence Embedding
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, input_ids, token_type_ids=None):
        # 1. Token Embedding
        words_embeddings = self.word_embeddings(input_ids)

        # 2. Sentence Embedding
        if token_type_ids is None:
            token_type_ids = torch.zeros_like(input_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        # 3. Transformer Positional Embedding
        seq_length = input_ids.size(1)
        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
        position_embeddings = self.position_embeddings(position_ids)

        embeddings = words_embeddings + position_embeddings + token_type_embeddings

        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)

        return embeddings



## BertLayer 모듈

In [11]:
class BertLayer(nn.Module):
    def __init__(self, config):
        super(BertLayer, self).__init__()

        # Self-Attention 부분
        self.attention = BertAttention(config)

        # Self-Attention의 출력을 처리하는 전결합 층
        self.intermediate = BertIntermediate(config)

        # Self-Attention에 의한 특징량과 BertLayer에 원래의 입력을 더하는 층
        self.output = BertOutput(config)

    def forward(self, hiddne_states, attention_mask, attention_show_fig=False):
        if attention_show_fig == True:
            attention_output, attention_prob = self.attention(hiddne_states, attention_mask, attention_show_fig)
            intermediate_output = self.intermediate(attention_output)
            layer_output = self.output(intermediate_output, attention_output)
            return layer_output, attention_prob

        elif attention_show_fig == False:
            attention_output = self.attention(hiddne_states, attention_mask, attention_show_fig)
            intermediate_output = self.intermediate(attention_output)
            layer_output = self.output(intermediate_output, attention_output)

            return layer_output

class BertAttention(nn.Module):
    def __init__(self, config):
        super(BertAttention, self).__init__()
        self.selfattn = BertSelfAttention(config)
        self.output = BertSelfOutput(config)

    def forward(self, input_tensor, attention_mask, attention_show_fig=False):
        if attention_show_fig ==True:
            self_output, attention_probs = self.selfattn(input_tensor, attention_mask, attention_show_fig)
            attention_output = self.output(self_output, input_tensor)
            return attention_output, attention_probs

        elif attention_show_fig == False:
            self_output = self.selfattn(input_tensor, attention_mask, attention_show_fig)
            attention_output = self.output(self_output, input_tensor)
            return attention_output

class BertSelfAttention(nn.Module):
    def __init__(self, config):
        super(BertSelfAttention, self).__init__()

        self.num_attention_heads = config.num_attention_heads

        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = nn.Linear(config.hidden_size, self.all_head_size)
        self.key = nn.Linear(config.hidden_size, self.all_head_size)
        self.value = nn.Linear(config.hidden_size, self.all_head_size)

        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)

    def transpose_for_scores(self, x):
        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
        x = x.view(*new_x_shape)
        return x.permute(0,2,1,3)

    def forward(self, hidden_states, attention_mask, attention_show_fig=False):
        mixed_query_layer = self.query(hidden_states)
        mixed_key_layer = self.key(hidden_states)
        mixed_value_layer = self.value(hidden_states)

        query_layer = self.transpose_for_scores(mixed_query_layer)
        key_layer = self.transpose_for_scores(mixed_key_layer)
        value_layer = self.transpose_for_scores(mixed_value_layer)

        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1,-2))
        attention_scores = attention_scores / math.sqrt(self.attention_head_size)

        attention_scores = attention_scores + attention_mask

        attention_probs = nn.Softmax(dim=-1)(attention_scores)
        attention_probs = self.dropout(attention_probs)

        context_layer = torch.matmul(attention_probs, value_layer)

        context_layer = context_layer.permute(0,2,1,3).contiguous()
        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size, )

        context_layer = context_layer.view(*new_context_layer_shape)

        if attention_show_fig == True:
            return context_layer, attention_probs
        elif attention_show_fig == False:
            return context_layer

class BertSelfOutput(nn.Module):
    def __init__(self, config):
        super(BertSelfOutput, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, hidden_states, input_tensor):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.LayerNorm(hidden_states + input_tensor)
        return hidden_states

def gelu(x):
    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))


class BertIntermediate(nn.Module):
    def __init__(self, config):
        super(BertIntermediate, self).__init__()

        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)

        self.intermediate_act_fn = gelu

    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.intermediate_act_fn(hidden_states)
        return hidden_states

class BertOutput(nn.Module):
    def __init__(self, config):
        super(BertOutput, self).__init__()

        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)


    def forward(self, hidden_states, input_tensor):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.LayerNorm(hidden_states + input_tensor)
        return hidden_states

    

## BertLayer 모듈의 반복부분

In [12]:
class BertEncoder(nn.Module):
    def __init__(self, config):
        super(BertEncoder, self).__init__()

        self.layer = nn.ModuleList([BertLayer(config) for _ in range(config.num_hidden_layers)])

    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True, attention_show_fig=False):
        all_encoder_layers = []

        for layer_module in self.layer:
            if attention_show_fig == True:
                hidden_states, attention_prob = layer_module(hidden_states, attention_mask, attention_show_fig)
            elif attention_show_fig == False:
                hidden_states = layer_module(hidden_states, attention_mask, attention_show_fig)

            if output_all_encoded_layers:
                all_encoder_layers.append(hidden_states)

        if not output_all_encoded_layers:
            all_encoder_layers.append(hidden_states)

        if attention_show_fig == True:
            return all_encoder_layers, attention_prob
        elif attention_show_fig == False:
            return all_encoder_layers

## BertPooler 모듈

In [13]:
class BertPooler(nn.Module):
    def __init__(self, config):
        super(BertPooler, self).__init__()

        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()

    def forward(self, hidden_states):
        first_token_tensor = hidden_states[:, 0]

        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output

In [14]:
input_ids = torch.LongTensor([[31,51,12,23,99], [15,5,1,0,0]])
print("입력 단어 ID열의 텐서 크기 : ", input_ids.shape)
attention_mask = torch.LongTensor([[1,1,1,1,1], [1,1,1,0,0]])
print("입력 마스크의 텐서 크기 : ", attention_mask.shape)

token_type_ids = torch.LongTensor([[0,0,1,1,1], [0,1,1,1,1]])
print("입력 문장 ID의 텐서 크기 : ", token_type_ids)

embeddings = BertEmbeddings(config)
encoder = BertEncoder(config)
pooler = BertPooler(config)

extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
extended_attention_mask = extended_attention_mask.to(dtype=torch.float32)
extended_attention_mask = (1.0 - extended_attention_mask) * - 10000.0
print("확장된 마스크의 텐서 크기 : ", extended_attention_mask.shape)

out1 = embeddings(input_ids, token_type_ids)
print("BertEmbeddings의 출력 크기 : ", out1.shape)

out2 = encoder(out1, extended_attention_mask)
print("BertEncoder 최후 층의 출력 텐서 크기 : ",out2[0].shape)

out3 = pooler(out2[-1])
print("BertPooler의 출력 텐서 크기 : ", out3.shape)

입력 단어 ID열의 텐서 크기 :  torch.Size([2, 5])
입력 마스크의 텐서 크기 :  torch.Size([2, 5])
입력 문장 ID의 텐서 크기 :  tensor([[0, 0, 1, 1, 1],
        [0, 1, 1, 1, 1]])
확장된 마스크의 텐서 크기 :  torch.Size([2, 1, 1, 5])
BertEmbeddings의 출력 크기 :  torch.Size([2, 5, 768])
BertEncoder 최후 층의 출력 텐서 크기 :  torch.Size([2, 5, 768])
BertPooler의 출력 텐서 크기 :  torch.Size([2, 768])


## 모두 연결

In [15]:
class BertModel(nn.Module):
    def __init__(self, config):
        super(BertModel, self).__init__()

        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True, attention_show_flg=False):
        if attention_mask is None:
            attention_mask = torch.ones_like(input_ids)
        if token_type_ids is None:
            token_type_ids = torch.zeros_like(input_ids)

        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)

        extended_attention_mask = extended_attention_mask.to(dtype=torch.float32)
        extended_attention_mask = (1.0 - extended_attention_mask)*-10000.0

        embedding_output = self.embeddings(input_ids, token_type_ids)

        if attention_show_flg == True:
            encoded_layers, attention_probs = self.encoder(embedding_output, extended_attention_mask, output_all_encoded_layers, attention_show_flg)
        
        elif attention_show_flg == False:
            encoded_layers = self.encoder(embedding_output, extended_attention_mask, output_all_encoded_layers, attention_show_flg)

        pooled_output = self.pooler(encoded_layers[-1])

        if not output_all_encoded_layers:
            encoded_layers = encoded_layers[-1]

        if attention_show_flg == True:
            return encoded_layers, pooled_output, attention_probs
        elif attention_show_flg == False:
            return encoded_layers, pooled_output

In [17]:
input_ids = torch.LongTensor([[31,51,12,23,99], [15,5,1,0,0]])
attention_mask = torch.LongTensor([[1,1,1,1,1], [1,1,1,0,0]])
token_type_ids = torch.LongTensor([[0,0,1,1,1], [0,1,1,1,1]])

net = BertModel(config)
encoded_layers, pooled_output, attention_probs = net(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False, attention_show_flg=True)

print("encoded_layer의 텐서 크기: ", encoded_layers.shape)
print("pooled_output의 텐서 크기: ", pooled_output.shape)
print("attention_prob의 텐서 크기: ", attention_probs.shape)


encoded_layer의 텐서 크기:  torch.Size([2, 5, 768])
pooled_output의 텐서 크기:  torch.Size([2, 768])
attention_prob의 텐서 크기:  torch.Size([2, 12, 5, 5])


# Bert를 활용한 벡터표현 비교(bank: 은행과 bank: 강변) 

## 학습된 모델 로드

In [18]:
weights_path = "./weights/pytorch_model.bin"
loaded_state_dict = torch.load(weights_path)

for s in loaded_state_dict.keys():
    print(s)

bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.gamma
bert.embeddings.LayerNorm.beta
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.gamma
bert.encoder.layer.0.attention.output.LayerNorm.beta
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.gamma
bert.encoder.layer.0.output.LayerNorm.beta
bert.encoder.layer.1.attention.self.query.weight
bert.encode

In [19]:
net = BertModel(config)
net.eval()

param_names = []

for name, param in net.named_parameters():
    print(name)
    param_names.append(name)

embeddings.word_embeddings.weight
embeddings.position_embeddings.weight
embeddings.token_type_embeddings.weight
embeddings.LayerNorm.gamma
embeddings.LayerNorm.beta
encoder.layer.0.attention.selfattn.query.weight
encoder.layer.0.attention.selfattn.query.bias
encoder.layer.0.attention.selfattn.key.weight
encoder.layer.0.attention.selfattn.key.bias
encoder.layer.0.attention.selfattn.value.weight
encoder.layer.0.attention.selfattn.value.bias
encoder.layer.0.attention.output.dense.weight
encoder.layer.0.attention.output.dense.bias
encoder.layer.0.attention.output.LayerNorm.gamma
encoder.layer.0.attention.output.LayerNorm.beta
encoder.layer.0.intermediate.dense.weight
encoder.layer.0.intermediate.dense.bias
encoder.layer.0.output.dense.weight
encoder.layer.0.output.dense.bias
encoder.layer.0.output.LayerNorm.gamma
encoder.layer.0.output.LayerNorm.beta
encoder.layer.1.attention.selfattn.query.weight
encoder.layer.1.attention.selfattn.query.bias
encoder.layer.1.attention.selfattn.key.weight
e

In [20]:
new_state_dict = net.state_dict().copy()

for index, (key_name, value) in enumerate(loaded_state_dict.items()):
    name = param_names[index]
    new_state_dict[name] = value
    print(str(key_name)+"->"+str(name))

    if index+1 >= len(param_names):
        break

net.load_state_dict(new_state_dict)

bert.embeddings.word_embeddings.weight->embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight->embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight->embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.gamma->embeddings.LayerNorm.gamma
bert.embeddings.LayerNorm.beta->embeddings.LayerNorm.beta
bert.encoder.layer.0.attention.self.query.weight->encoder.layer.0.attention.selfattn.query.weight
bert.encoder.layer.0.attention.self.query.bias->encoder.layer.0.attention.selfattn.query.bias
bert.encoder.layer.0.attention.self.key.weight->encoder.layer.0.attention.selfattn.key.weight
bert.encoder.layer.0.attention.self.key.bias->encoder.layer.0.attention.selfattn.key.bias
bert.encoder.layer.0.attention.self.value.weight->encoder.layer.0.attention.selfattn.value.weight
bert.encoder.layer.0.attention.self.value.bias->encoder.layer.0.attention.selfattn.value.bias
bert.encoder.layer.0.attention.output.dense.weight->encoder.layer.0.atte

<All keys matched successfully>

## Bert용 Tokenizer 구현

In [21]:
import collections

def load_vocab(vocab_file):
    vocab = collections.OrderedDict()
    ids_to_tokens = collections.OrderedDict()
    index = 0
    
    with open(vocab_file, "r", encoding="utf-8") as reader:
        while True:
            token = reader.readline()
            if not token:
                break

            token = token.strip()

            vocab[token] = index
            ids_to_tokens[index] = token
            index+=1

    return vocab, ids_to_tokens

vocab_file = "./vocab/bert-base-uncased-vocab.txt"
vocab, ids_to_tokens = load_vocab(vocab_file)

In [26]:
from utils.tokenizer import BasicTokenizer, WordpieceTokenizer

# BasicTokenizer, WordpieceTokenizer는, 참고 문헌[2] 그대로입니다
# https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/tokenization.py
# sub-word로 단어 분할을 실시하는 클래스들입니다.
class BertTokenizer(object):
    '''BERT용의 문장 단어 분할 클래스를 구현'''

    def __init__(self, vocab_file, do_lower_case=True):
        '''
        vocab_file: vocabulary에의 경로
        do_lower_case: 전처리에서 단어를 소문자로 바꾸는지 여부
        '''

        # vocabulary의 로드
        self.vocab, self.ids_to_tokens = load_vocab(vocab_file)

        # 분할 처리 함수를 "utils" 폴더에서 imoprt, sub-word로 단어 분할을 실시
        never_split = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")
        # (주석)위 단어는 도중에 분할하지 않는다. 이를 통해 하나의 단어로 간주함

        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
                                              never_split=never_split)
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)

    def tokenize(self, text):
        '''문장의 단어를 분할하는 함수'''
        split_tokens = []  # 분할 후 단어들
        for token in self.basic_tokenizer.tokenize(text):
            for sub_token in self.wordpiece_tokenizer.tokenize(token):
                split_tokens.append(sub_token)
        return split_tokens

    def convert_tokens_to_ids(self, tokens):
        """분할된 단어 목록을 ID로 변환하는 함수"""
        ids = []
        for token in tokens:
            ids.append(self.vocab[token])

        return ids

    def convert_ids_to_tokens(self, ids):
        """ID를 단어로 변환하는 함수"""
        tokens = []
        for i in ids:
            tokens.append(self.ids_to_tokens[i])
        return tokens


## Bank의 문맥에 따른 의미변화를 단어 벡터로 구하기

In [29]:
# 문장1: 은행 계좌에 접근했습니다.
text_1 = "[CLS] I accessed the bank account. [SEP]"

# 문장2: 그는 보증금을 은행 계좌로 이체했습니다.
text_2 = "[CLS] He transferred the deposit money into the bank account. [SEP]"

# 문장3: 우리는 강변에서 축구를 합니다.
text_3 = "[CLS] We play soccer at the bank of the river. [SEP]"

# 단어 분할 Tokenizer를 준비
tokenizer = BertTokenizer(
    vocab_file="./vocab/bert-base-uncased-vocab.txt", do_lower_case=True)

# 문장의 단어를 분할
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
tokenized_text_3 = tokenizer.tokenize(text_3)

# 확인
print(tokenized_text_1)


['[CLS]', 'i', 'accessed', 'the', 'bank', 'account', '.', '[SEP]']


In [31]:
# 단어를 ID로 변환하기
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
indexed_tokens_3 = tokenizer.convert_tokens_to_ids(tokenized_text_3)

# 각 문장의 bank 위치
bank_posi_1 = np.where(np.array(tokenized_text_1) == "bank")[0][0]  # 4
bank_posi_2 = np.where(np.array(tokenized_text_2) == "bank")[0][0]  # 8
bank_posi_3 = np.where(np.array(tokenized_text_3) == "bank")[0][0]  # 6

# seqId(첫번째인지 2번째 문장인지는 이번에는 필요 없음)

# 리스트를 PyTorch의 텐서로
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_3 = torch.tensor([indexed_tokens_3])

# bank의 단어 id
bank_word_id = tokenizer.convert_tokens_to_ids(["bank"])[0]

# 확인
print(tokens_tensor_1)


tensor([[  101,  1045, 11570,  1996,  2924,  4070,  1012,   102]])


In [34]:
with torch.no_grad():
    encoded_layers_1,_ = net(tokens_tensor_1, output_all_encoded_layers=True)
    encoded_layers_2,_ = net(tokens_tensor_2, output_all_encoded_layers=True)
    encoded_layers_3,_ = net(tokens_tensor_3, output_all_encoded_layers=True)

In [35]:
bank_vector_0 = net.embeddings.word_embeddings.weight[bank_word_id]

bank_vector_1_1 = encoded_layers_1[0][0, bank_posi_1]
bank_vector_1_12 = encoded_layers_1[11][0, bank_posi_1]

bank_vector_2_1 = encoded_layers_2[0][0, bank_posi_2]
bank_vector_2_12 = encoded_layers_2[11][0, bank_posi_2]

bank_vector_3_1 = encoded_layers_3[0][0, bank_posi_3]
bank_vector_3_12 = encoded_layers_3[11][0, bank_posi_3]

In [36]:
import torch.nn.functional as F

print("bank의 초기 벡터와 문장1의 1단 bank 유사도: ", F.cosine_similarity(bank_vector_0, bank_vector_1_1, dim=0))
print("bank의 초기 벡터와 문장1의 12단 bank 유사도: ", F.cosine_similarity(bank_vector_0, bank_vector_1_12, dim=0))

print("문장 1의 1층 bank와 문장 2의 1단 bank 유사도: ", F.cosine_similarity(bank_vector_1_1, bank_vector_2_1, dim=0))
print("문장 1의 1층 bank와 문장 3의 1단 bank 유사도: ", F.cosine_similarity(bank_vector_1_1, bank_vector_3_1, dim=0))

print("문장 1의 12층 bank와 문장 2의 12단 bank 유사도: ", F.cosine_similarity(bank_vector_1_12, bank_vector_2_12, dim=0))
print("문장 1의 12층 bank와 문장 3의 12단 bank 유사도: ", F.cosine_similarity(bank_vector_1_12, bank_vector_3_12, dim=0))


bank의 초기 벡터와 문장1의 1단 bank 유사도:  tensor(0.6814, grad_fn=<DivBackward0>)
bank의 초기 벡터와 문장1의 12단 bank 유사도:  tensor(0.2276, grad_fn=<DivBackward0>)
문장 1의 1층 bank와 문장 2의 1단 bank 유사도:  tensor(0.8968)
문장 1의 1층 bank와 문장 3의 1단 bank 유사도:  tensor(0.7584)
문장 1의 12층 bank와 문장 2의 12단 bank 유사도:  tensor(0.8796)
문장 1의 12층 bank와 문장 3의 12단 bank 유사도:  tensor(0.4814)


In [37]:
import random

In [62]:
def factorial(num):
    if num == 0 or num == 1:
        return 1

    return num * factorial(num-1)

In [69]:
from math import sqrt
for _ in range(5):
 
    ax, ay, bx, by, cx, cy = (random.randint(0,10) for _ in range(6))
    print(ax,ay,bx,by,cx,cy)
    
    pos = [[ax, ay], [bx, by], [cx, cy]]
    pos = sorted(pos, key=lambda x: x[0])
    x_inc = pos[1][0] - pos[0][0]
    y_inc = pos[1][1] - pos[0][1]
    
    if (ax - bx) == (ax - cx) == 0 or (ay - by) == (ay - cy) == 0:
        print(-1.0)
    elif (ay - by) * (ax - cx) == (ay - cy) * (ax - bx):
        print(-1.0)
    else:
        line_1 = sqrt((ay - by) ** 2 + (ax - bx) ** 2)
        line_2 = sqrt((by - cy) ** 2 + (bx - cx) ** 2)
        line_3 = sqrt((cy - ay) ** 2 + (cx - ax) ** 2)
    
        ret_max = max(line_1 + line_2, line_2 + line_3, line_3 + line_1)
        ret_min = min(line_1 + line_2, line_2 + line_3, line_3 + line_1)
        ret = (ret_max - ret_min) * 2
        print(ret)
    print("----------")

8 4 7 2 10 5
4.01314541923899
----------
7 6 1 10 8 5
14.376223409339065
----------
5 9 9 10 5 4
6.175993850620635
----------
0 3 10 9 10 2
9.323807579381203
----------
5 8 0 0 6 7
16.03953513936702
----------


In [70]:
import random

In [75]:
a = []
for _ in range(10):
    for _ in range(3):
        a.append(random.randint(-100,100))
    print(a)
    a.sort()
    if a[1] - a[0] == a[2] - a[1]:
        print(a[2] * 2 - a[1])
    elif a[1] - a[0] > a[2] - a[1]:
        print(a[1] * 2 - a[2])
    else:
        print(a[1] * 2 - a[0])
    print("--------")

[52, 94, 13]
91
--------
[13, 52, 94, -97, 71, 50]
-24
--------
[-97, 13, 50, 52, 71, 94, 37, -18, 43]
-49
--------
[-97, -18, 13, 37, 43, 50, 52, 71, 94, -57, 10, 71]
-96
--------
[-97, -57, -18, 10, 13, 37, 43, 50, 52, 71, 71, 94, 3, -6, -70]
-83
--------
[-97, -70, -57, -18, -6, 3, 10, 13, 37, 43, 50, 52, 71, 71, 94, -40, 64, -30]
-83
--------
[-97, -70, -57, -40, -30, -18, -6, 3, 10, 13, 37, 43, 50, 52, 64, 71, 71, 94, -3, 73, -7]
-83
--------
[-97, -70, -57, -40, -30, -18, -7, -6, -3, 3, 10, 13, 37, 43, 50, 52, 64, 71, 71, 73, 94, -23, 72, 31]
-83
--------
[-97, -70, -57, -40, -30, -23, -18, -7, -6, -3, 3, 10, 13, 31, 37, 43, 50, 52, 64, 71, 71, 72, 73, 94, -12, -66, -70]
-70
--------
[-97, -70, -70, -66, -57, -40, -30, -23, -18, -12, -7, -6, -3, 3, 10, 13, 31, 37, 43, 50, 52, 64, 71, 71, 72, 73, 94, -64, -18, 39]
-70
--------
