In [2]:
from transformers import AutoModelForMaskedLM #AutoModel also works
model = AutoModelForMaskedLM.from_pretrained('Synthyra/ESMplusplus_small', trust_remote_code=True)
tokenizer = model.tokenizer

sequences = ['MPRTEIN', 'MSEQWENCE']
tokenized = tokenizer(sequences, padding=True, return_tensors='pt')

# tokenized['labels'] = tokenized['input_ids'].clone() # correctly mask input_ids and set unmasked instances of labels to -100 for MLM training

output = model(**tokenized) # get all hidden states with output_hidden_states=True
print(output.logits.shape) # language modeling logits, (batch_size, seq_len, vocab_size), (2, 11, 64)
print(output.last_hidden_state.shape) # last hidden state of the model, (batch_size, seq_len, hidden_size), (2, 11, 960)
print(output.loss) # language modeling loss if you passed labels
#print(output.hidden_states) # all hidden states if you passed output_hidden_states=True (in tuple)


import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer

model_path = 'Synthyra/FastESM2_650'
model = AutoModelForMaskedLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

sequences = ['MPRTEIN', 'MSEQWENCE']
tokenized = tokenizer(sequences, padding=True, return_tensors='pt')
with torch.no_grad():
    embeddings = model(**tokenized, output_hidden_states=True).hidden_states[-1]

print(embeddings.shape) # (1, 11, 1280)


modeling_esm_plusplus.py:   0%|          | 0.00/36.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Synthyra/ESMplusplus_small:
- modeling_esm_plusplus.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


torch.Size([2, 11, 64])
torch.Size([2, 11, 960])
None
torch.Size([2, 11, 1280])


In [1]:
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType
from modeling_esm_plusplus import ESMplusplusConfig, ESMplusplusForSequenceClassification

In [3]:
esmconfig = ESMplusplusConfig(
    vocab_size=64,
    hidden_size=960,
    num_attention_heads=15,
    num_hidden_layers=30,
    num_labels=1,   # regression
    problem_type=None,
    dropout=0.0,
)
### All three of these work
#model = ESMplusplusForSequenceClassification.from_pretrained("Synthyra/ESMplusplus_small", num_labels=1)
#model = ESMplusplusForSequenceClassification(esmconfig).from_pretrained("Synthyra/ESMplusplus_small")
model = AutoModelForSequenceClassification.from_pretrained("Synthyra/ESMplusplus_small", num_labels=1, trust_remote_code=True)

print(model)

peft_config = LoraConfig(
    task_type=TaskType.FEATURE_EXTRACTION,
    inference_mode=False,
    target_modules=["layernorm_qkv.1"],
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

Some weights of ESMplusplusForSequenceClassification were not initialized from the model checkpoint at Synthyra/ESMplusplus_small and are newly initialized: ['classifier.0.bias', 'classifier.0.weight', 'classifier.2.bias', 'classifier.2.weight', 'classifier.3.bias', 'classifier.3.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ESMplusplusForSequenceClassification(
  (embed): Embedding(64, 960)
  (transformer): TransformerStack(
    (blocks): ModuleList(
      (0-29): 30 x UnifiedTransformerBlock(
        (attn): MultiHeadAttention(
          (layernorm_qkv): Sequential(
            (0): LayerNorm((960,), eps=1e-05, elementwise_affine=True)
            (1): Linear(in_features=960, out_features=2880, bias=False)
          )
          (out_proj): Linear(in_features=960, out_features=960, bias=False)
          (q_ln): LayerNorm((960,), eps=1e-05, elementwise_affine=True)
          (k_ln): LayerNorm((960,), eps=1e-05, elementwise_affine=True)
          (rotary): RotaryEmbedding()
        )
        (ffn): Sequential(
          (0): LayerNorm((960,), eps=1e-05, elementwise_affine=True)
          (1): Linear(in_features=960, out_features=5120, bias=False)
          (2): SwiGLU()
          (3): Linear(in_features=2560, out_features=960, bias=False)
        )
      )
    )
    (norm): LayerNorm((960,), eps=1e-05, elem

In [10]:
model

PeftModelForFeatureExtraction(
  (base_model): LoraModel(
    (model): ESMplusplusForSequenceClassification(
      (embed): Embedding(64, 960)
      (transformer): TransformerStack(
        (blocks): ModuleList(
          (0-29): 30 x UnifiedTransformerBlock(
            (attn): MultiHeadAttention(
              (layernorm_qkv): Sequential(
                (0): LayerNorm((960,), eps=1e-05, elementwise_affine=True)
                (1): lora.Linear(
                  (base_layer): Linear(in_features=960, out_features=2880, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Identity()
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=960, out_features=8, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=8, out_features=2880, bias=False)
                  )
                  (lora_embedding_A): ParameterDict()
            