# Convert MaskedLM model to a SequenceClassification model

In [1]:
import torch
from transformers import AutoModelForMaskedLM, AutoModelForSequenceClassification
from transformers import AutoTokenizer

import os

In [2]:
local_model_base_dir = '../local_models/'

In [3]:
MaskedLM_model_path = local_model_base_dir + 'bert-base-uncased_option1_with_bertram_bt2'
SeqClassification_model_path = local_model_base_dir + 'bert-base-uncased_option1_with_bertram_bt2_SC'

In [4]:
if os.path.exists(SeqClassification_model_path):
    err = f"Error! Directory {SeqClassification_model_path} already exists!"
    raise Exception(err)

## Load as Masked LM and save as SequenceClassification model

In [5]:
print(f"Going to load the MaskedLM model from {MaskedLM_model_path} and convert it into a SequenceClassification \
model and save it at {SeqClassification_model_path}")

Going to load the MaskedLM model from ../local_models/bert-base-uncased_option1_with_bertram_bt2 and convert it into a SequenceClassification model and save it at ../local_models/bert-base-uncased_option1_with_bertram_bt2_SC


In [6]:
# Load the models
model = AutoModelForSequenceClassification.from_pretrained(MaskedLM_model_path)
tokenizer = AutoTokenizer.from_pretrained(MaskedLM_model_path, use_fast=False, truncation=True)

Some weights of the model checkpoint at ../local_models/bert-base-uncased_option1_with_bertram_bt2 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../local_models/be

In [7]:
# Save the models
model.save_pretrained(SeqClassification_model_path)
tokenizer.save_pretrained(SeqClassification_model_path)
print(f"Saved the SequenceClassification model at {SeqClassification_model_path}")

Saved the SequenceClassification model at ../local_models/bert-base-uncased_option1_with_bertram_bt2_SC


## Test the Tokenizer & SequenceClassification model

In [8]:
tokenizer.get_added_vocab()

{'<BERTRAM:IDaboveboardID>': 30522,
 '<BERTRAM:IDaboveparID>': 30523,
 '<BERTRAM:IDacrosstheboardID>': 30524,
 '<BERTRAM:IDaddfueltothefireID>': 30525,
 '<BERTRAM:IDagainstthegrainID>': 30526,
 '<BERTRAM:IDaheadofthecurveID>': 30527,
 '<BERTRAM:IDaheadofthegameID>': 30528,
 '<BERTRAM:IDalittlebirdtoldmeID>': 30529,
 '<BERTRAM:IDallalongID>': 30530,
 '<BERTRAM:IDallbetsareoffID>': 30531,
 '<BERTRAM:IDalldressedupandnowheretogoID>': 30532,
 '<BERTRAM:IDallhellbrokelooseID>': 30533,
 '<BERTRAM:IDalloverbartheshoutingID>': 30534,
 '<BERTRAM:IDalloverthemapID>': 30535,
 '<BERTRAM:IDallovertheplaceID>': 30536,
 '<BERTRAM:IDallovertheshopID>': 30537,
 '<BERTRAM:IDallovertheshowID>': 30538,
 '<BERTRAM:IDalltherageID>': 30539,
 '<BERTRAM:IDallthingsbeingequalID>': 30540,
 '<BERTRAM:IDallwetID>': 30541,
 '<BERTRAM:IDalongaboutID>': 30542,
 '<BERTRAM:IDandallthatjazzID>': 30543,
 '<BERTRAM:IDandthensomeID>': 30544,
 '<BERTRAM:IDanoldflameID>': 30545,
 '<BERTRAM:IDarguethetossID>': 30546,
 '<BERTR

In [9]:
# Test for MWE single-tokens
test_tokens = ['<BERTRAM:IDoffthebeatentrackID>', '<BERTRAM:IDchapterandverseID>', '<BERTRAM:IDjointheclubID>', '<BERTRAM:IDtietheknotID>']

In [10]:
for token in test_tokens:
    assert tokenizer.tokenize(f'This is a {token}')[-1] == token, f"Error:{token} not detected"
    print(tokenizer.tokenize(f'This is a {token}'))

print(f'SUCCESS!! The {SeqClassification_model_path} tokenizer model has been tested!!')

['this', 'is', 'a', '<BERTRAM:IDoffthebeatentrackID>']
['this', 'is', 'a', '<BERTRAM:IDchapterandverseID>']
['this', 'is', 'a', '<BERTRAM:IDjointheclubID>']
['this', 'is', 'a', '<BERTRAM:IDtietheknotID>']
SUCCESS!! The ../local_models/bert-base-uncased_option1_with_bertram_bt2_SC tokenizer model has been tested!!


In [11]:
# TEST THE SequenceClassification model
def get_model_predictions(sents):
    inputs=tokenizer(sents, padding=True, truncation=True, return_tensors="pt")
    outputs = model(**inputs) 
    logits = outputs.logits
    print(logits)
    predictions = torch.argmax(logits, dim=-1)
    return predictions

sents = [f'This {token} is very very bad', f'This is a {token} with very long sentence!']
for token in test_tokens:
    for sent in sents:
        preds = get_model_predictions([sent])
        print(f"Input token:{token} \nPredictions:{preds}")
        print('-'*30)

tensor([[-0.0267, -0.2155]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDoffthebeatentrackID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0011, -0.0722]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDoffthebeatentrackID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0267, -0.2155]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDchapterandverseID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0011, -0.0722]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDchapterandverseID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0267, -0.2155]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDjointheclubID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0011, -0.0722]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDjointheclubID> 
Predictions:tensor([0])
------------------------------
tensor([[-0.0267, -0.2155]], grad_fn=<AddmmBackward0>)
Input token:<BERTRAM:IDtiethekn