### Convert pytorch wrapper to HF model 
This step was needed to load the model and attach a new 9 label head instead of the 28 label head

## Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


## Rebuild training wrapper and load checkpoint

In [5]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from safetensors.torch import load_file  # <-- IMPORTANT

MODEL_NAME = "microsoft/deberta-v3-large"

# You can use either the root or checkpoint dir – they both have model.safetensors
CHECKPOINT_DIR = "/content/drive/MyDrive/VibeQ-EIE/models/HF_deberta_goemotions_focal_v1"
# or:
# CHECKPOINT_DIR = "/content/drive/MyDrive/VibeQ-EIE/models/HF_deberta_goemotions_focal_v1/checkpoint-10856"

# ---- 1. Recreate the wrapper exactly like you trained ----
class FocalLoss(torch.nn.Module):
    def __init__(self, gamma=2.0, reduction="mean"):
        super().__init__()
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        import torch.nn.functional as F
        bce_loss = F.binary_cross_entropy_with_logits(logits, targets, reduction="none")
        probs = torch.sigmoid(logits)
        pt = targets * probs + (1 - targets) * (1 - probs)
        focal_factor = (1 - pt) ** self.gamma
        loss = focal_factor * bce_loss

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss

class DebertaForMultiLabel(torch.nn.Module):
    def __init__(self, model_name, num_labels, gamma=2.0):
        super().__init__()
        self.base_model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            problem_type="multi_label_classification",
        )
        self.loss_fct = FocalLoss(gamma=gamma)

    def forward(self, *args, **kwargs):
        return self.base_model(*args, **kwargs)

OLD_NUM_LABELS = 28   # original GoEmotions labels
GAMMA = 1.5           # same gamma you used

wrapped_model = DebertaForMultiLabel(
    model_name=MODEL_NAME,
    num_labels=OLD_NUM_LABELS,
    gamma=GAMMA,
)

# ---- 2. Load the state dict from model.safetensors ----
state_dict_path = f"{CHECKPOINT_DIR}/model.safetensors"
print("Loading state dict from:", state_dict_path)

state_dict = load_file(state_dict_path)  # <--- safetensors loader

missing, unexpected = wrapped_model.load_state_dict(state_dict, strict=False)
print("Missing keys:", missing)
print("Unexpected keys:", unexpected)


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading state dict from: /content/drive/MyDrive/VibeQ-EIE/models/HF_deberta_goemotions_focal_v1/model.safetensors
Missing keys: []
Unexpected keys: []


## Save clean HuggingFace model

In [6]:
from transformers import AutoTokenizer

CLEAN_SAVE_DIR = "/content/drive/MyDrive/VibeQ-EIE/models/deberta_goemotions_28labels_clean"

# Save the underlying HF model (without the wrapper)
wrapped_model.base_model.save_pretrained(CLEAN_SAVE_DIR)

# Save tokenizer (you can also load from your old folder if you prefer)
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")
tokenizer.save_pretrained(CLEAN_SAVE_DIR)

print("Saved clean HF model to", CLEAN_SAVE_DIR)


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Saved clean HF model to /content/drive/MyDrive/VibeQ-EIE/models/deberta_goemotions_28labels_clean
