In [None]:
!pip install -q --upgrade transformers torch torchvision torchaudio
!pip install -q tokenizers==0.13.3
!pip install -q bitsandbytes transformers accelerate gradio thread6

In [None]:
# if you know the type of model you want to use, you can use the class that defines its architecture directly. Let’s take a look at how this works with a BERT model.

In [5]:
from transformers import BertConfig, BertModel, AutoModel

In [2]:
# Building the config
config = BertConfig()

# Building the model from the config
model = BertModel(config)

In [3]:
# The 'BertConfig' contains many attributes that are used to build the model
config

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.33.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

The model as it is above is not pretrained. It can be used in this state but it will ouput gibberish. This is why we use the 'from_pretrained' method

## Definitions
#### "hidden_size": Defines the size of the "hidden_states" vector

#### "num_hidden_layers": Defines the number of layers the Transformer model has

In [6]:
# Loading a pretrained model. NOTE: This overwrites the above 'model' variable
#model = BertModel.from_pretrained("bert-base-cased")

# NOTE AGAIN: we could and SHOULD have used the AutoModel class but for the sake of the example, we used the BertModel class. please use AutoModel
model = AutoModel.from_pretrained("bert-base-cased")

# NOTE FOR A THIRD TIME: we didn't use the bertconfig since it would have loaded a blank model which is useless unless you want to train it for research

# NOTE A 4TH TIME: The identifier used to load the model can be the identifier of any model on the Model Hub, as long as it is compatible with the BERT architecture.

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

In [10]:
# we can save a pretrained model (or finetuned) with the save_pretrained() method 
# The argument is the path you want to save it to
model.save_pretrained("/notebooks/NLP_huggingface/Chapter_2")

# The above saves 2 files to the directory you chose, a 'bin' file and a 'json' file

##### Transformers models can only process numbers, numbers that the tokenizer generates.

##### Let's see how that's done below V

In [2]:
# This is how we input words into the models tokenizer
sequences = ["Hello!", "Cool.", "Nice!"]

# This is how the model's tokenizer transforms the above words
encoded_sequences = [
    [101, 7592, 999, 102],
    [101, 4658, 1012, 102],
    [101, 3835, 999, 102],
]

##### The tokenizer converts the words, which are typically called "input ID's" 

##### The above is a list of encoded sequences, also called a list of lists. Tensors only accept rectangular shapes(think matrices) so we can easily
##### convert the above into a tensor since it's already rectangular shaped

In [3]:
import torch

# We're converting the above list of lists into a tensor
model_inputs = torch.tensor(encoded_sequences)

In [8]:
# the output of our tensor
output = model(model_inputs)
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 4.4496e-01,  4.8276e-01,  2.7797e-01,  ..., -5.4032e-02,
           3.9394e-01, -9.4770e-02],
         [ 2.4943e-01, -4.4093e-01,  8.1772e-01,  ..., -3.1917e-01,
           2.2992e-01, -4.1172e-02],
         [ 1.3668e-01,  2.2518e-01,  1.4502e-01,  ..., -4.6915e-02,
           2.8224e-01,  7.5566e-02],
         [ 1.1789e+00,  1.6738e-01, -1.8187e-01,  ...,  2.4671e-01,
           1.0441e+00, -6.1970e-03]],

        [[ 3.6436e-01,  3.2464e-02,  2.0258e-01,  ...,  6.0111e-02,
           3.2451e-01, -2.0995e-02],
         [ 7.1866e-01, -4.8725e-01,  5.1740e-01,  ..., -4.4012e-01,
           1.4553e-01, -3.7545e-02],
         [ 3.3223e-01, -2.3271e-01,  9.4877e-02,  ..., -2.5268e-01,
           3.2172e-01,  8.1079e-04],
         [ 1.2523e+00,  3.5754e-01, -5.1320e-02,  ..., -3.7840e-01,
           1.0526e+00, -5.6255e-01]],

        [[ 2.4042e-01,  1.4718e-01,  1.2110e-01,  ...,  7.6062e-02,
           3.3564e-01,  2