In [49]:
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
import random
import numpy as np
import torch.nn.functional as F
import torch.nn as nn

In [50]:
model = BertModel.from_pretrained('bert-base-uncased')
model.eval()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [56]:
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = [101] + tokenizer.convert_tokens_to_ids(tokenized_text) + [102]
tokens_tensor = torch.tensor([indexed_tokens])

In [59]:
tokenized_text

['who',
 'was',
 'jim',
 'henson',
 '?',
 'jim',
 'henson',
 'was',
 'a',
 'puppet',
 '##eer']

In [4]:
# build data
f = open("wiki-0.1percent.txt", "r")
wiki = f.readlines()
data = []
for idx, line in enumerate(wiki):
    result = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(line.strip())[:510])
    result = [101] + result + [102] + [0]*(510 - len(result))
    data.append(result)
t_data = torch.LongTensor(data)
del data

In [5]:
def sampling_constrative(data, N):
    idx_list = np.random.choice(np.arange(0, data.shape[0]),N, replace= False)
    return data[idx_list]

In [6]:
batch_size= 16
collect_num = 3
emb_size = 768
sample1 = sampling_constrative(t_data, batch_size*collect_num)
sample2 = sampling_constrative(t_data, batch_size*collect_num)
_, sentence_emb1 = model(torch.LongTensor(sample1))
_, sentence_emb2 = model(torch.LongTensor(sample2))
#sentence_emb = sentence_emb.view(batch_size, collect_num, emb_size)

In [8]:
sentence_emb.shape

torch.Size([64, 768])

In [396]:
huber_loss = torch.nn.HuberLoss()

In [9]:
def pdist(e, squared=False, eps=1e-12):
    e_square = e.pow(2).sum(dim=1)
    prod = e @ e.t()
    res = (e_square.unsqueeze(1) + e_square.unsqueeze(0) - 2 * prod).clamp(min=eps)

    if not squared:
        res = res.sqrt()

    res = res.clone()
    res[range(len(e)), range(len(e))] = 0
    return res

In [22]:
class RkdDistance(nn.Module):
    def forward(self, student, teacher):
        with torch.no_grad():
            t_d = pdist(teacher, squared=False)
            mean_td = t_d[t_d>0].mean()
            t_d = t_d / mean_td

        d = pdist(student, squared=False)
        mean_d = d[d>0].mean()
        d = d / mean_d

        loss = F.smooth_l1_loss(d, t_d, reduction='mean')
        return loss

In [23]:
RDistance = RkdDistance()

In [24]:
RDistance(sentence_emb1, sentence_emb2)

tensor(0.2391, grad_fn=<SmoothL1LossBackward0>)

In [397]:
# input dim : [batch_size, num_tuple ,embedding_size] # [64, 2, 768]
def distance_wise_relation_function(input):
    assert input.shape[1] == 2, "num_tuple must be 2."
    
    size = input.shape[0]
    batch_sum = 0
    vector = torch.empty(size = (size,1))
    for i in range(size):
        x_i = input[i][0]
        x_j = input[i][1]
        sub = torch.sub(x_i, x_j).norm(p=2)
        vector[i] = sub
        batch_sum += sub
    return vector*size*(1/batch_sum)

In [398]:
relation = distance_wise_relation_function(sentence_emb)

AssertionError: num_tuple must be 2.

In [11]:
angle_loss = RKdAngle()

In [12]:
angle_loss(sentence_emb1, sentence_emb2)



tensor(0.2838, grad_fn=<SmoothL1LossBackward0>)

In [113]:
import torch
import numpy as np
# bert 에 dropout이 있으니까 그냥 encoder에 두번 똑같은 sentence를 넣으면 된데. train mode에서 그런듯

In [184]:
features = torch.Tensor(np.random.normal(1,1, size = (1,3,7)))

In [185]:
batch_size = features.shape[0]
mask = torch.eye(batch_size, dtype=torch.float32)
contrast_count = features.shape[1]
contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
#anchor_feature = contrast_feature
anchor_feature = features[:, 0]   # 전체 다 가져옴
#anchor_count = contrast_count
anchor_count = 1

In [186]:
anchor_dot_contrast = torch.div(torch.matmul(anchor_feature, contrast_feature.T), 0.8)

In [187]:
anchor_dot_contrast

tensor([[7.1038, 6.4583, 6.5186]])

In [188]:
logit_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
logits = anchor_dot_contrast - logit_max


In [189]:
logits

tensor([[ 0.0000, -0.6455, -0.5853]])

In [190]:
mask = mask.repeat(anchor_count, contrast_count)

In [191]:
mask.shape

torch.Size([1, 3])

In [192]:
mask

tensor([[1., 1., 1.]])

In [246]:
torch.arange(batch_size * (anchor_count)).view(-1, 1)

tensor([[0]])

In [251]:
logits_mask = torch.scatter(torch.ones_like(mask), 1, torch.arange(batch_size * anchor_count).view(-1, 1), 0)

In [252]:
logits_mask

tensor([[0., 1., 1.]])

In [253]:
mask = mask * logits_mask

In [254]:
exp_logits = torch.exp(logits) * logits_mask

In [255]:
exp_logits

tensor([[0.0000, 0.5244, 0.5570]])

In [256]:
torch.log(exp_logits.sum(1, keepdim=True))

tensor([[0.0782]])

In [257]:
exp_logits.sum(1, keepdim=True)

tensor([[1.0814]])

In [258]:
logits

tensor([[ 0.0000, -0.6455, -0.5853]])

In [259]:
log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))

In [260]:
mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)

In [261]:
mean_log_prob_pos

tensor([-0.6936])

In [165]:
loss = -1 * mean_log_prob_pos

In [166]:
loss = loss.view(anchor_count, batch_size).mean()

In [24]:
import torch
torch.LongTensor(list(map(int, labels)))

tensor([1, 1, 1,  ..., 1, 1, 1])

In [1]:
from dataload import get_data, load_data, construct_data_for_finetuning
t_data1, t_data2, labels = get_data(data_type="test", task = "stsb")

Reusing dataset glue (/home/shinjk1156/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
t_data1, t_data2, labels = get_data(data_type="test", task = "sick")

Using custom data configuration sick
Reusing dataset sick (/home/shinjk1156/.cache/huggingface/datasets/sick/sick/0.0.0/c6b3b0b44eb84b134851396d6d464e5cb8f026960519d640e087fe33472626db)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
data, token_ids = construct_data_for_finetuning(t_data1, t_data2)

In [78]:
idices =(t_data1 == 102).nonzero(as_tuple=True)[1]
data1 = t_data1.clone()
for i in range(idices.shape[0]):
    data1[i][idices[i] + 1:] = t_data2[i,1:512- (idices[i])]

In [5]:
t_data1, t_data2, labels = get_data(data_type="test", task = "sick")

Using custom data configuration sick
Reusing dataset sick (/home/shinjk1156/.cache/huggingface/datasets/sick/sick/0.0.0/c6b3b0b44eb84b134851396d6d464e5cb8f026960519d640e087fe33472626db)


  0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
t_loader = load_data(t_data1, 'cpu', 16, shuffle_true=False)

Complete! Data loading


In [12]:
for data, attn in t_loader:
    data

ModuleNotFoundError: No module named 'transformers'