In [1]:
import os
import torch
import torch.nn.functional as F
from huggingface_hub import hf_hub_download, login, HfApi, create_repo
from transformers import RobertaConfig, RobertaPreTrainedModel, RobertaModel, AutoTokenizer, AutoModel
import warnings
warnings.filterwarnings("ignore")

In [None]:
OLD_REPO_ID = "Hidden-States/roberta-base-go-emotions-pt-only"
NEW_REPO_ID = "Hidden-States/roberta-base-go-emotions"
HF_TOKEN = "paste a valid token here"

In [3]:
login(token=HF_TOKEN)
api = HfApi()

In [4]:
# THE CLASS DEFINITIONS
class EmoAxisConfig(RobertaConfig):
    model_type = "emoaxis"
    def __init__(self, num_classes=28, freeze_upto=0, **kwargs):
        kwargs["max_position_embeddings"] = 514
        kwargs["type_vocab_size"] = 1
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.freeze_upto = freeze_upto

class EmoAxis(RobertaPreTrainedModel):
    config_class = EmoAxisConfig
    def __init__(self, config):
        super().__init__(config)
        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(config.hidden_size, 512),
            torch.nn.LayerNorm(512),
            torch.nn.GELU(),
            torch.nn.Dropout(0.25),
            torch.nn.Linear(512, config.num_classes)
        )
        self.post_init()
    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        last_hidden_state = outputs.hidden_states[-1]
        mask = attention_mask.unsqueeze(-1).float()
        text_emb = (last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9)
        text_emb = torch.nn.functional.normalize(text_emb, p=2, dim=1)
        logits = self.mlp(text_emb)
        return text_emb, logits

In [5]:
# THE FILE STRINGS (For Hub Use)
# This code is what actually goes into the .py files on Hugging Face

config_content = """from transformers import RobertaConfig
class EmoAxisConfig(RobertaConfig):
    model_type = "emoaxis"
    def __init__(self, num_classes=28, freeze_upto=0, **kwargs):
        kwargs["max_position_embeddings"] = 514
        kwargs["type_vocab_size"] = 1
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.freeze_upto = freeze_upto
"""

modeling_content = """import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaPreTrainedModel, RobertaModel
from .configuration_emoaxis import EmoAxisConfig

class EmoAxis(RobertaPreTrainedModel):
    config_class = EmoAxisConfig
    def __init__(self, config):
        super().__init__(config)
        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.mlp = nn.Sequential(
            nn.Linear(config.hidden_size, 512),
            nn.LayerNorm(512),
            nn.GELU(),
            nn.Dropout(0.25),
            nn.Linear(512, config.num_classes)
        )
        self.post_init()
    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        last_hidden_state = outputs.hidden_states[-1]
        mask = attention_mask.unsqueeze(-1).float()
        text_emb = (last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9)
        text_emb = F.normalize(text_emb, p=2, dim=1)
        logits = self.mlp(text_emb)
        return text_emb, logits
"""

In [6]:
# EXECUTION & HUB PUSH
local_dir = "./final_package"
os.makedirs(local_dir, exist_ok=True)

# Write the architecture files to the folder
with open(f"{local_dir}/configuration_emoaxis.py", "w") as f: f.write(config_content)
with open(f"{local_dir}/modeling_emoaxis.py", "w") as f: f.write(modeling_content)

# Download and Remap Weights
print("Downloading original weights...")
ckpt_path = hf_hub_download(repo_id=OLD_REPO_ID, filename="model.pt")
checkpoint = torch.load(ckpt_path, map_location="cpu")
new_sd = {k.replace("encoder.encoder.", "roberta.").replace("classifier.mlp.", "mlp."): v
          for k, v in checkpoint["model_state_dict"].items()}

# Initialize and Load
config = EmoAxisConfig(num_classes=28)
model = EmoAxis(config)
model.load_state_dict(new_sd)

# Add auto_map to the config
config.auto_map = {
    "AutoConfig": "configuration_emoaxis.EmoAxisConfig",
    "AutoModel": "modeling_emoaxis.EmoAxis"
}

# Save locally
model.save_pretrained(local_dir)
AutoTokenizer.from_pretrained("roberta-base").save_pretrained(local_dir)

create_repo(repo_id=NEW_REPO_ID, exist_ok=True)
api.upload_folder(folder_path=local_dir, repo_id=NEW_REPO_ID)
print(f"Success! Your model is live on {NEW_REPO_ID}")

Downloading original weights...


model.pt:   0%|          | 0.00/1.49G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...package/model.safetensors:   3%|3         | 16.8MB /  498MB            

Success! Your model is live on Hidden-States/roberta-base-go-emotions


***Checking that AutoModel can correctly load the model from the new repo...***

In [None]:
print("Loading model from Hub...\n\n")

model_id = "Hidden-States/roberta-base-go-emotions"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id, trust_remote_code=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()