In [1]:
from transformers import AutoModelForSequenceClassification
import torch
import numpy as np
from utils import PROD_TOK, PIEZ_TOK, preprocess

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
model_pr = AutoModelForSequenceClassification.from_pretrained("VCNC/bert_3").to(device)
model_pi = AutoModelForSequenceClassification.from_pretrained("VCNC/bert_piezas").to(device)

In [4]:
model_pi.bert.encoder.layer[11].output.dense #Demostración de como acceder a una capa en específico del modelo

Linear(in_features=3072, out_features=768, bias=True)

In [5]:
#Para extraer rasgos intermedios del modelo lo que hay que hacer es crear un gancho (hook)
def get_features(name):
    def hook(model, input, output):
        features[name] = output.detach()
    return hook

In [6]:
model_pi.bert.encoder.layer[11].output.dense.register_forward_hook(get_features('feats')) #Registramos el gancho en el modelo

<torch.utils.hooks.RemovableHandle at 0x7fe47735f340>

In [7]:
inp1 = torch.tensor([[1, 2, 4, 1, 6, 7]]).to(device)
inp2 = torch.tensor([[4, 2, 3, 5, 7]]).to(device)

In [8]:
features = {} #Creamos un diccionario donde se van a almacenar los rasgos intermedios que el gancho obtenga
model_pi(input_ids=inp1) #Hacemos una llamada al modelo (notese como no hace falta guardar la salida del mismo)

SequenceClassifierOutput(loss=None, logits=tensor([[ -2.4952,  -3.1932,  -1.2001,  -1.7882,  -3.1423,  -1.9941, -20.5563,
          -4.1423,  -2.4272,  -7.4258,  -2.7523,  -5.4735,  -2.3379,  -3.1901,
         -20.5552,  -5.5140, -20.5605,  -4.0876,  -5.1690,  -5.4560]],
       device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [9]:
len(features['feats'][0][0]) #Accedemos al vector de features que contiene la salida intermedia, notese como la longitud es de 768

768

In [10]:
def sentence_distance(sent1:torch.tensor, sent2:torch.tensor, model):
    global features
    features = {}
    model(input_ids=sent1)
    feat1 = features['feats'][0][0]
    features = {}
    model(input_ids=sent2)
    feat2 = features['feats'][0][0]
    return np.linalg.norm(feat1.cpu()-feat2.cpu())

In [11]:
sentence_distance(inp1, inp2, model_pi)

1.3077558e-05

In [12]:
#Metemos todo junto en una sola función
def sequence_similarity(inp1:list[str], inp2:list[str], model:str, tokens:dict, device:torch.device):
    model = AutoModelForSequenceClassification.from_pretrained(model).to(device)
    def get_features(name):
        def hook(model, input, output):
            features[name] = output.detach()
        return hook
    model.bert.encoder.layer[11].output.dense.register_forward_hook(get_features('feats'))
    sent1 = preprocess(inp1, tokens).to(device)
    sent2 = preprocess(inp2, tokens).to(device)
    return sentence_distance(sent1, sent2, model)

In [13]:
inp1 = ['1800-ARK', '1200 TAPA 6','1200 CINCO ROSCAS','1200 DADO VUELTA','1600','1800', '1200', '1200' 'other']
inp2 = ['1800-ARK', '1200 TAPA 6','1200 CINCO ROSCAS','1200 DADO VUELTA','1600','1800', '1200', '1200' 'other']
model = 'VCNC/bert_3'
tokens = PROD_TOK
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sequence_similarity(inp1, inp2, model, tokens, device) #Si la salida da 0 significa que funcionó, ya que ambos inputs son exactamente iguales

0.0

La función sequence_similarity junto con la función sentence_distance están implementadas en utils.py para ser importadas