In [1]:
from NitMultilingualEncoders import NitQwenMathInstruct, NitMT5encoder, NitRobertaencoder

In [2]:
from AlignmentModels import Conv1dAutoencoder, CNN1DRBencoder

In [3]:
import torch
import torch.nn as nn

In [4]:
def count_parameters(model: nn.Module):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    non_trainable_params = sum(p.numel() for p in model.parameters() if not p.requires_grad)
    return trainable_params, non_trainable_params

In [5]:
def load_model(model, filename='model.pth'):
    state_dict = torch.load(filename, weights_only=True)
    new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
    model.load_state_dict(new_state_dict)
    model.eval()  # Set the model to evaluation mode
    print(f"Model loaded from {filename}")

In [6]:
model_dir = "model_cache"

In [7]:
max_tokens = 100
padding = "max_length"

In [8]:
qwen_template = "<|im_start|>{text}<|im_end|>"

In [9]:
qwen = NitQwenMathInstruct(cache_dir=model_dir, max_tokens=max_tokens, padding=padding)
qwen.setTemplate(qwen_template)

In [10]:
rb = NitRobertaencoder(cache_dir=model_dir, max_tokens=max_tokens, padding=padding)

Some weights of RobertaModel were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
rb_embedding_shape = rb.getEmbedding_shape()
qwen_embedding_shape = qwen.getEmbedding_shape()

In [12]:
rb_embedding_shape, qwen_embedding_shape

((100, 768), (100, 1536))

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [14]:
alignment_model = CNN1DRBencoder(rb_embedding_shape, qwen_embedding_shape).to(device)

In [15]:
load_model(alignment_model, "qwen_rb.pth")

Model loaded from qwen_rb.pth


In [16]:
count_parameters(alignment_model)

(83961384, 0)

In [24]:
test_texts = ["what is 12+27?"]

In [25]:
original_embeddings = qwen.get_embeddings_from_text(test_texts)

In [26]:
rb_embeddings = rb.get_embeddings_from_text(test_texts)

In [27]:
altered_embeddings = alignment_model(rb_embeddings.input_embeds)

In [28]:
qwen.get_generation(original_embeddings)

[' a question from a 5th grade math test\nTo solve the problem \\(12 + 27\\), we can break it down into simpler steps:\n\n1. Add the tens place digits together: \\(10 + 20 = 30\\).\n2. Add the ones place digits together: \\(2 + 7 = 9\\).\n3. Combine the results from steps 1 and 2: \\(30 + 9 = 39\\).\n\nSo, the sum of \\(12 + 27\\) is \\(\\boxed{39}\\).']

In [29]:
rb_embeddings.input_embeds = altered_embeddings

In [30]:
qwen.get_generation(rb_embeddings)

[' a) 39 b) 40 c) 41 d) 42\n\nTo solve the problem \\(12 + 27\\), we can break it down into simpler steps:\n\n1. Add the ones place digits: \\(2 + 7 = 9\\).\n2. Add the tens place digits: \\(1 + 2 = 3\\).\n3. Combine the results from the tens and ones places: \\(39\\).\n\nSo, the sum of \\(12 + 27\\) is \\(39\\).\n\nTherefore, the correct answer is \\(\\boxed{a}\\).']