In [1]:
import sentence_transformers
import os
import torch


gtr_dir = "/pretrained_models/gtr-t5-base"
linear_dir = os.path.join(gtr_dir, "2_Dense")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dense_model = sentence_transformers.models.Dense.load(linear_dir)

In [3]:
print(dense_model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.0422, -0.0312,  0.0625,  ...,  0.0703, -0.0718,  0.0017],
        [-0.0518, -0.0613, -0.0449,  ...,  0.0253, -0.0045,  0.0698],
        [ 0.0009,  0.0007, -0.0325,  ..., -0.0182, -0.0205,  0.0012],
        ...,
        [-0.0742, -0.0466,  0.0645,  ..., -0.0114, -0.0244,  0.0630],
        [ 0.0261, -0.0306,  0.0410,  ...,  0.0049, -0.0132, -0.0124],
        [-0.0737,  0.0815,  0.0023,  ..., -0.0615,  0.0520,  0.0223]]))])


In [4]:
from openmatch.modeling import LinearHead

new_linear = LinearHead(768, 768, True)
print(new_linear.state_dict())

OrderedDict([('linear_q.weight', tensor([[-0.0130,  0.0267, -0.0248,  ...,  0.0253, -0.0267, -0.0267],
        [ 0.0110,  0.0041,  0.0282,  ...,  0.0211, -0.0159,  0.0135],
        [-0.0280,  0.0325, -0.0296,  ...,  0.0038,  0.0218,  0.0221],
        ...,
        [ 0.0304,  0.0043, -0.0109,  ...,  0.0237,  0.0022, -0.0326],
        [ 0.0302,  0.0346,  0.0184,  ...,  0.0204, -0.0359, -0.0103],
        [-0.0108, -0.0178, -0.0069,  ...,  0.0132,  0.0077, -0.0336]])), ('linear_p.weight', tensor([[-0.0130,  0.0267, -0.0248,  ...,  0.0253, -0.0267, -0.0267],
        [ 0.0110,  0.0041,  0.0282,  ...,  0.0211, -0.0159,  0.0135],
        [-0.0280,  0.0325, -0.0296,  ...,  0.0038,  0.0218,  0.0221],
        ...,
        [ 0.0304,  0.0043, -0.0109,  ...,  0.0237,  0.0022, -0.0326],
        [ 0.0302,  0.0346,  0.0184,  ...,  0.0204, -0.0359, -0.0103],
        [-0.0108, -0.0178, -0.0069,  ...,  0.0132,  0.0077, -0.0336]]))])


In [5]:
new_linear.linear_q.weight.data = dense_model.linear.weight.data
print(new_linear.state_dict())

OrderedDict([('linear_q.weight', tensor([[ 0.0422, -0.0312,  0.0625,  ...,  0.0703, -0.0718,  0.0017],
        [-0.0518, -0.0613, -0.0449,  ...,  0.0253, -0.0045,  0.0698],
        [ 0.0009,  0.0007, -0.0325,  ..., -0.0182, -0.0205,  0.0012],
        ...,
        [-0.0742, -0.0466,  0.0645,  ..., -0.0114, -0.0244,  0.0630],
        [ 0.0261, -0.0306,  0.0410,  ...,  0.0049, -0.0132, -0.0124],
        [-0.0737,  0.0815,  0.0023,  ..., -0.0615,  0.0520,  0.0223]])), ('linear_p.weight', tensor([[ 0.0422, -0.0312,  0.0625,  ...,  0.0703, -0.0718,  0.0017],
        [-0.0518, -0.0613, -0.0449,  ...,  0.0253, -0.0045,  0.0698],
        [ 0.0009,  0.0007, -0.0325,  ..., -0.0182, -0.0205,  0.0012],
        ...,
        [-0.0742, -0.0466,  0.0645,  ..., -0.0114, -0.0244,  0.0630],
        [ 0.0261, -0.0306,  0.0410,  ...,  0.0049, -0.0132, -0.0124],
        [-0.0737,  0.0815,  0.0023,  ..., -0.0615,  0.0520,  0.0223]]))])


In [7]:
new_linear.save_pooler("/pretrained_models/gtr-t5-base-openmatch")

In [11]:
from transformers import T5EncoderModel, AutoTokenizer, AutoModel

gtr_model = T5EncoderModel.from_pretrained(gtr_dir)
tokenizer = AutoTokenizer.from_pretrained(gtr_dir)
print(type(gtr_model).__name__)

T5EncoderModel


In [9]:
gtr_model.save_pretrained("/pretrained_models/gtr-t5-base-openmatch")
tokenizer.save_pretrained("/pretrained_models/gtr-t5-base-openmatch")

('/pretrained_models/gtr-t5-base-openmatch/tokenizer_config.json',
 '/pretrained_models/gtr-t5-base-openmatch/special_tokens_map.json',
 '/pretrained_models/gtr-t5-base-openmatch/spiece.model',
 '/pretrained_models/gtr-t5-base-openmatch/added_tokens.json',
 '/pretrained_models/gtr-t5-base-openmatch/tokenizer.json')

In [13]:
config = {
    "plm_backbone": {
        "type": type(gtr_model).__name__,
        "feature": "last_hidden_state",
    },
    "pooling": "mean",
    "linear_head": {
        "input_dim": 768,
        "output_dim": 768,
        "tied": True
    },
    "normalize": True,
}
import json
with open("/pretrained_models/gtr-t5-base-openmatch/openmatch_config.json", "w") as f:
    json.dump(config, f, indent=4)

In [1]:
from openmatch.arguments import ModelArguments

model_args = ModelArguments(model_name_or_path="/pretrained_models/gtr-t5-base-openmatch", encoder_only=True)

from openmatch.modeling import DenseModelForInference

model = DenseModelForInference.build(model_args)

  from .autonotebook import tqdm as notebook_tqdm


hello???


In [2]:
english_sentences = ["Berlin is the capital of Germany", "Berlin is a large city in Germany",
                     "Tensorflow can be used for deep learning", "Pytorch, developed by Facebook AI, is a deep learning framework",
                    "Is Scipy or numpy better?", "Which is faster: scipy or pandas?",
                    "Cats can live for quite a long time", "Cats are humans best friend"]

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("/pretrained_models/gtr-t5-base-openmatch")
encoded_input = tokenizer(english_sentences, return_tensors="pt", padding=True)
print(encoded_input)

{'input_ids': tensor([[ 4308,    19,     8,  1784,    13,  3434,     1,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [ 4308,    19,     3,     9,   508,   690,    16,  3434,     1,     0,
             0,     0,     0,     0,     0,     0,     0],
        [ 4738,     7,   127,  7631,    54,    36,   261,    21,  1659,  1036,
             1,     0,     0,     0,     0,     0,     0],
        [12901,    17,   127,   524,     6,  1597,    57,  1376,  7833,     6,
            19,     3,     9,  1659,  1036,  4732,     1],
        [   27,     7,   180,  3389,    63,    42,   206,  1167,    63,   394,
            58,     1,     0,     0,     0,     0,     0],
        [ 4073,    19,  3627,    10,     3,     7,  3389,    63,    42,  2131,
          7664,    58,     1,     0,     0,     0,     0],
        [ 3431,     7,    54,   619,    21,   882,     3,     9,   307,    97,
             1,     0,     0,     0,     0,     0,     0],
        [ 3431,     

In [3]:
from sentence_transformers import util
hidden, reps = model.encode_passage(encoded_input)
util.dot_score(reps, reps)

torch.Size([8, 17, 768])
torch.Size([8, 17, 768])
final


tensor([[1.0000, 0.8741, 0.3953, 0.4471, 0.3823, 0.3676, 0.3727, 0.4211],
        [0.8741, 1.0000, 0.4013, 0.4346, 0.3300, 0.3374, 0.4055, 0.3853],
        [0.3953, 0.4013, 1.0000, 0.6366, 0.5313, 0.5230, 0.3930, 0.4091],
        [0.4471, 0.4346, 0.6366, 1.0000, 0.5341, 0.5523, 0.4042, 0.4882],
        [0.3823, 0.3300, 0.5313, 0.5341, 1.0000, 0.7789, 0.4009, 0.4493],
        [0.3676, 0.3374, 0.5230, 0.5523, 0.7789, 1.0000, 0.4256, 0.4364],
        [0.3727, 0.4055, 0.3930, 0.4042, 0.4009, 0.4256, 1.0000, 0.6377],
        [0.4211, 0.3853, 0.4091, 0.4882, 0.4493, 0.4364, 0.6377, 1.0000]])

In [4]:
print(model.head.state_dict())

OrderedDict([('linear_q.weight', tensor([[ 0.0422, -0.0312,  0.0625,  ...,  0.0703, -0.0718,  0.0017],
        [-0.0518, -0.0613, -0.0449,  ...,  0.0253, -0.0045,  0.0698],
        [ 0.0009,  0.0007, -0.0325,  ..., -0.0182, -0.0205,  0.0012],
        ...,
        [-0.0742, -0.0466,  0.0645,  ..., -0.0114, -0.0244,  0.0630],
        [ 0.0261, -0.0306,  0.0410,  ...,  0.0049, -0.0132, -0.0124],
        [-0.0737,  0.0815,  0.0023,  ..., -0.0615,  0.0520,  0.0223]])), ('linear_p.weight', tensor([[ 0.0422, -0.0312,  0.0625,  ...,  0.0703, -0.0718,  0.0017],
        [-0.0518, -0.0613, -0.0449,  ...,  0.0253, -0.0045,  0.0698],
        [ 0.0009,  0.0007, -0.0325,  ..., -0.0182, -0.0205,  0.0012],
        ...,
        [-0.0742, -0.0466,  0.0645,  ..., -0.0114, -0.0244,  0.0630],
        [ 0.0261, -0.0306,  0.0410,  ...,  0.0049, -0.0132, -0.0124],
        [-0.0737,  0.0815,  0.0023,  ..., -0.0615,  0.0520,  0.0223]]))])
