In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import transformers
from transformers import BertModel, BertTokenizer
import torch.nn.functional as F



In [2]:
# Define the architecture of your MLP projection layer
class MLPProjection(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(MLPProjection, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define list to hold linear layers
        self.linear_layers = nn.ModuleList()

        # Add the first linear layer
        self.linear_layers.append(nn.Linear(input_size, hidden_size))

        # Add additional linear layers
        for _ in range(num_layers - 1):
            self.linear_layers.append(nn.Linear(hidden_size, hidden_size))

        # GELU activation function
        self.activation = nn.GELU()

    def forward(self, x):
        for linear_layer in self.linear_layers:
            x = self.activation(linear_layer(x))
        return x
# Define a simple encoder model
class SimpleEncoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleEncoder, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        self.output_size = output_size
    
    def forward(self, x):
        x = self.fc(x)
        x = F.relu(x)
        return x


In [4]:
# Load the large language model (LLM)
llm_model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Assuming input size and output size for the simple encoder
input_size_encoder = 50
output_size_encoder = 64
# Load the recommendation encoder
encoder_model = SimpleEncoder(input_size_encoder, output_size_encoder)  # Replace 'YourEncoderModel' with the name of the encoder model you're using
# encoder_model.load_state_dict(torch.load('path_to_encoder_model.pth'))  # Load pre-trained weights


# Freeze the parameters of the LLM and recommendation encoder
for param in llm_model.parameters():
    param.requires_grad = False

for param in encoder_model.parameters():
    param.requires_grad = False

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:


# Define the MLP projection layer to align the recommendation encoder with the LLM
input_size = encoder_model.output_size  # Output size of the recommendation encoder
hidden_size = output_size_encoder  # Define your desired hidden size
num_layers = 5
batch_size = 128
mlp_projection = MLPProjection(input_size, hidden_size, num_layers)

# Example input and target
system_command = "your_system_command"
user_query = "user_query"
user_features = torch.randn(batch_size, input_size)  # Assuming user features are represented as a tensor
target = torch.randn(batch_size, hidden_size)  # Assuming a random target for demonstration

In [8]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(mlp_projection.parameters(), lr=0.001)

# Training loop
num_epochs = 10
# Define special tokens
CLS_token_id = tokenizer.cls_token_id  # Get the ID of the [CLS] token
SEP_token_id = tokenizer.sep_token_id  # Get the ID of the [SEP] token
for epoch in range(num_epochs):
    optimizer.zero_grad()

    # Pass user features through MLP projection layer
    user_features_aligned = mlp_projection(user_features)

    # Encode user query and concatenate with aligned user features
    encoded_user_query = tokenizer.encode(system_command, user_query, add_special_tokens=True, max_length=512, truncation=True, return_tensors="pt")
    combined_tensor = torch.cat((encoded_user_query, user_features_aligned), dim=1).long()
    # Process inputs through LLM
    llm_outputs = llm_model(combined_tensor)

    # Compute loss
    loss = criterion(user_features_aligned, target)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Print training statistics
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 1 but got size 128 for tensor number 1 in the list.