In [7]:
from peft import get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

import torch
from torch.utils.data import Dataset

## Setup Model

In [8]:
output_dim = 11

# Load your model (initially for classification)
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=output_dim)

# Define LoRA configuration for multivariate regression
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, # sequenceClass as helper
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
)

# Apply the LoRA PEFT configuration to your model
model = get_peft_model(model, peft_config)

# Modify output dimension for regression
model.config.num_labels = output_dim

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Dataset & Training

In [9]:
# example dataset class
class RandomRegressionDataset(Dataset):
    def __init__(self, size=1000, seq_length=128, output_dim=11):
        self.size = size
        self.seq_length = seq_length
        self.output_dim = output_dim

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        input_ids = torch.randint(0, 30522, (self.seq_length,))  # Random token ids
        attention_mask = torch.ones(self.seq_length)  # Dummy attention mask
        labels = torch.randn(self.output_dim)  # Random regression targets
        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

# create random dataset for training and evaluation
train_dataset = RandomRegressionDataset()
eval_dataset = RandomRegressionDataset()

In [10]:
# custom Trainer to handle the regression loss (MSE)
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        predictions = outputs.logits
        loss_fn = torch.nn.MSELoss()  # Mean Squared Error Loss
        loss = loss_fn(predictions, labels)
        return (loss, outputs) if return_outputs else loss

# training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  # Use eval_strategy instead of deprecated evaluation_strategy
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,  # Shortened for testing purposes
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Trainer setup using CustomTrainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# Train the model
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m7shoe[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,1.016,1.011762
2,1.014,1.00235
3,0.9619,0.984955
4,0.9942,1.002096
5,1.0029,1.005605


TrainOutput(global_step=315, training_loss=1.0160121826898485, metrics={'train_runtime': 32.4899, 'train_samples_per_second': 153.894, 'train_steps_per_second': 9.695, 'total_flos': 330080340480000.0, 'train_loss': 1.0160121826898485, 'epoch': 5.0})

## Run Inference

In [11]:
# take a subset of the evaluation dataset for inference
subset_size = 23  # Use a small subset for testing
eval_subset = [eval_dataset[i] for i in range(subset_size)]

# Convert the subset to a batch for inference
input_ids = torch.stack([item['input_ids'] for item in eval_subset])
attention_mask = torch.stack([item['attention_mask'] for item in eval_subset])
labels = torch.stack([item['labels'] for item in eval_subset])

inputs = {
    'input_ids': input_ids,
    'attention_mask': attention_mask
}

# Move inputs to the appropriate device (GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inputs = {k: v.to(device) for k, v in inputs.items()}
labels = labels.to(device)

# Run inference
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    predictions = outputs.logits  # These are the predicted values

In [12]:
# mean Squared Error computation
mse_loss_fn = torch.nn.MSELoss()

# calculate MSE between predictions and true labels
mse = mse_loss_fn(predictions, labels)

print(f"Mean Squared Error on subset: {mse.item()}")

Mean Squared Error on subset: 1.0689997673034668


In [13]:
predictions

tensor([[ 8.3236e-02, -7.4599e-02, -5.5312e-02, -6.8720e-05,  3.2924e-02,
          1.1646e-01,  7.6541e-02,  3.1326e-02, -7.0111e-03,  1.4391e-02,
          6.4919e-02],
        [ 5.5729e-02, -9.6576e-02, -7.5652e-02,  2.7464e-02,  1.7071e-02,
          1.5047e-01,  7.6762e-02,  4.8520e-02, -1.8715e-02,  2.4906e-02,
          6.8290e-02],
        [ 2.2446e-02,  1.8741e-02, -4.6552e-02, -7.7787e-04,  1.5758e-02,
          2.9205e-02, -2.7818e-02,  3.7431e-02,  1.2304e-02,  2.8333e-02,
         -3.7628e-03],
        [ 8.7305e-04,  2.7116e-02, -5.6817e-02, -3.2062e-02,  1.9789e-02,
         -8.7455e-03, -2.3889e-02, -1.6129e-02,  3.3801e-02,  1.3483e-02,
         -1.4406e-02],
        [ 5.9830e-03,  2.5881e-02, -3.5188e-02,  7.4200e-03,  4.3201e-04,
          2.2960e-02, -3.3125e-02,  3.2310e-02, -1.2279e-02, -5.6384e-04,
         -8.7629e-03],
        [ 6.3883e-02, -6.7322e-02, -4.3836e-02,  1.4952e-02, -1.9108e-03,
          1.1069e-01,  5.2055e-02,  5.7284e-02, -2.0236e-02, -1.2038e-0

## Decision Function

In [None]:
def decision_function(predictions:torch.Tensor):
    '''
    Make decision
    '''
    return predictions.argmax(dim=1)

## Store model

In [None]:
# define the path where you want to save the model
model_save_path = "./stored_regression"

# save the fine-tuned model along with LoRA adapters
model.save_pretrained(model_save_path)

# LOADING SCRIPT

In [22]:
from transformers import AutoModelForSequenceClassification
from peft import PeftModel
import torch

# load the base model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=11)

# load the fine-tuned model with LoRA adapters
model_save_path = '/home/siebenschuh/Projects/dataprep/code/DPO/regr_mvp/stored_regression'
model = PeftModel.from_pretrained(model, model_save_path)

# move the model to the appropriate device if necessary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def decision_function(predictions:torch.Tensor):
    '''
    Make decision
    '''
    return predictions.argmax(dim=1)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
