In [1]:
import argparse
import logging
import math
import os
import random

import datasets
from datasets import load_dataset, load_metric
from torch.utils.data.dataloader import DataLoader
from tqdm.auto import tqdm

import transformers
from accelerate import Accelerator
from transformers import (
    AdamW,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    PretrainedConfig,
    SchedulerType,
    default_data_collator,
    get_scheduler,
    set_seed,
)
from transformers.utils.versions import require_version

# My custom model
from models import BertForSequenceClassification
from models import DeeBertForSequenceClassification
from models import BertConfig
import torch
import matplotlib.pyplot as plt

# Test my BertForSequenceClassification

In [2]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# change model pretrained path here
config = BertConfig(exit_port_threshold=0.1, entropy_threshold=0.2)
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", config=config)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [3]:
inputs = tokenizer(["Hello, my dog is cute and I am the biggest person in the world"], 
                   max_length = 128, padding='max_length', truncation=True, return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0) # batch 1

In [5]:
# first phrase
outputs = model(**inputs, labels=labels)

In [8]:
# second phrase
outputs = model.exit_forward(**inputs, labels=labels)

In [14]:
model.entropy_label[0]

tensor([[0.6593, 1.0000],
        [0.6593, 1.0000]], grad_fn=<CatBackward>)

In [None]:
model.eval()

In [None]:
inputs = tokenizer(["Hello, my dog is cute and I am the biggest person in the world Yo yo"], 
                   max_length = 128, padding='max_length', truncation=True, return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0)

In [None]:
outputs = model.exit_inference_forward(**inputs)

In [None]:
model.stop_layers

In [None]:
outputs.logits

# Test DeeBert

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# change model pretrained path here
config = BertConfig(entropy_threshold=0.5)
model = DeeBertForSequenceClassification.from_pretrained("bert-base-uncased", config=config)

In [None]:
inputs = tokenizer(["Hello, my dog is cute and I am the biggest person in the world haha, Excuse me"], 
                   max_length = 128, padding='max_length', truncation=True, return_tensors="pt")
labels = torch.tensor([0]).unsqueeze(0)

In [None]:
# first phrase
outputs = model(**inputs, labels=labels)

In [None]:
# second phrase
outputs = model.exit_inference_forward(**inputs, labels=labels)

In [None]:
model.stop_layers