# This notebook shows the architecture & number of parameters of the different models

In [1]:
import sys,os
import json
project_root = os.path.dirname(os.getcwd())
sys.path.insert(0,project_root)

from src.load_data import *
from src.preprocessing import *
from src.model_setup import *
from src.training import *
from src.file_utils import *
from transformers import set_seed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_and_setup_model(task="cb",model_name="haisongzhang/roberta-tiny-cased",max_length=128):
    data = load_hf_dataset(task)
    # get tokenizer (bert)
    tokenizer = get_tokenizer(model_name)
    # get encoding method for particular task
    encode = get_encoding(task)
    # apply encoding
    dataset = preprocess_dataset(data,encode,tokenizer,max_length)
    # get label count
    num_labels = get_label_count(dataset)
    # set up model (head with num labels)
    model = setup_model(model_name,num_labels,dataset)
    return model

In [3]:
roberta_tiny_model = load_and_setup_model(model_name="haisongzhang/roberta-tiny-cased")
roberta_tiny_params = sum(p.numel() for p in roberta_tiny_model.parameters())
# Print model configuration
print(f"Model Layers: {roberta_tiny_model.config.num_hidden_layers}")
print(f"Hidden Size: {roberta_tiny_model.config.hidden_size}")
print(f"Number of Heads: {roberta_tiny_model.config.num_attention_heads}")
print(roberta_tiny_params)

Model Layers: 4
Hidden Size: 512
Number of Heads: 8
27982336


In [4]:
#model_name = "google/bert_uncased_L-2_H-128_A-2"
#model_name = "google/bert_uncased_L-4_H-512_A-8"#
bert_tiny_model = load_and_setup_model(model_name="google/bert_uncased_L-2_H-128_A-2")
bert_tiny_params = sum(p.numel() for p in bert_tiny_model.parameters())
print(f"Model Layers: {bert_tiny_model.config.num_hidden_layers}")
print(f"Hidden Size: {bert_tiny_model.config.hidden_size}")
print(f"Number of Heads: {bert_tiny_model.config.num_attention_heads}")
print(f"Parameters: {bert_tiny_params}")

Map: 100%|██████████| 56/56 [00:00<00:00, 1259.93 examples/s]


Model Layers: 2
Hidden Size: 128
Number of Heads: 2
Parameters: 4385920


In [6]:
bert_small_model = load_and_setup_model(model_name="google/bert_uncased_L-4_H-512_A-8")
bert_small_params = sum(p.numel() for p in bert_small_model.parameters())
print(f"Model Layers: {bert_small_model.config.num_hidden_layers}")
print(f"Hidden Size: {bert_small_model.config.hidden_size}")
print(f"Number of Heads: {bert_small_model.config.num_attention_heads}")
print(f"Parameters: {bert_small_params}")
print(f"Ratio of parameters between bert small and roberta tiny: {bert_small_params/roberta_tiny_params}")
# print(bert_small_params/roberta_tiny_params)

Model Layers: 4
Hidden Size: 512
Number of Heads: 8
Parameters: 28763648
Ratio of parameters between bert small and roberta tiny: 1.027921614549979
