In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained("prajjwal1/bert-tiny")
model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-tiny", num_labels=2)

# Load saved model state
model.load_state_dict(torch.load('models/best_model_state.bin'))
model.eval()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load('models/best_model_state.bin'))


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-1

In [2]:
def prepare_input_consistent(doc, summ, tokenizer, max_length=512):
    # Tokenize the document and summary
    doc_tokens = tokenizer.tokenize(doc)
    summ_tokens = tokenizer.tokenize(summ)

    # Ensure total length fits within max_length
    combined_tokens = (
        [tokenizer.cls_token_id] +
        tokenizer.convert_tokens_to_ids(doc_tokens) +
        [tokenizer.sep_token_id] +
        tokenizer.convert_tokens_to_ids(summ_tokens) +
        [tokenizer.sep_token_id]
    )
    
    # Truncate or pad to max_length
    if len(combined_tokens) > max_length:
        combined_tokens = combined_tokens[:max_length]
    
    attention_mask = [1] * len(combined_tokens)
    
    # Padding
    pad_length = max_length - len(combined_tokens)
    if pad_length > 0:
        combined_tokens += [tokenizer.pad_token_id] * pad_length
        attention_mask += [0] * pad_length
    
    return {
        "input_ids": torch.tensor([combined_tokens], dtype=torch.long),
        "attention_mask": torch.tensor([attention_mask], dtype=torch.long)
    }



def predict(text, summary, model, tokenizer):
    inputs = prepare_input_consistent(text, summary, tokenizer, max_length=512)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)
        prediction = torch.argmax(probabilities, dim=1).item()

    print(f"Prediction: {prediction}")
    print(f"Probabilities: {probabilities}")

    return prediction, probabilities



In [None]:
# Example text and summary
text = "Wasps have announced the appointment of Lee Blackett as their new backs coach for next season. The 32-year-old will move to the Aviva Premiership side from Championship club Rotherham, where he is currently head coach. He will replace former Wales fly-half Stephen Jones, who is returning to the Llanelli-based Scarlets in a coaching capacity. Wasps have announced the appointment of Lee Blackett as their new backs coach for next season . Wasps rugby director Dai Young said: 'We always knew the time would come when Stephen would want to return to Wales with his young family. He has done a great job for us. 'Lee is one of the brightest young coaches in Britain, who has won admiration throughout the game for how he has turned Rotherham Titans into one of the strongest teams outside of the Premiership. 'He has impressed knowledgeable onlookers by his attention to detail, enthusiasm and professionalism, and his proven ability to get the best out of the players he coaches.' Blackett (left) pictured playing for Leeds in 2010 as he is tackled by Guillaume Bousses (centre)"
summary = "Lee Blackett will move to the Aviva Premiership side next season . The 32-year-old is currently head coach at Championship club Rotherham .Blackett will replace former Wales fly-half Stephen Jones ."

summary_fake = "Lee Blackett will move to the English Super League side next season. The 42-year-old is currently CEO at Premier League football club Manchester City. Blackett will replace former England captain David Beckham."

prediction, probabilities = predict(text, summary, model, tokenizer)
prediction, probabilities = predict(text, summary_fake, model, tokenizer)

Prediction: 0
Probabilities: tensor([[0.9835, 0.0165]])
Prediction: 1
Probabilities: tensor([[0.2393, 0.7607]])
