In [1]:
import warnings
warnings.filterwarnings('ignore')
from transformers import BertForMaskedLM, BertTokenizer

# Explore pretrained LLM pranav-s/MaterialsBERT

In [2]:
tokenizer = BertTokenizer.from_pretrained('pranav-s/MaterialsBERT')
model = BertForMaskedLM.from_pretrained('pranav-s/MaterialsBERT')

## Part 1: Tokenizer

### Text input

In [None]:
text = "machine learning interatomic potentials"

### Converts a string in a sequence of tokens, using the tokenizer.
The tokens are either words or subwords. 

**interatomic** isn’t in the model vocabulary, so it’s been split into **inter**, **ato** and **mic**.

To indicate those tokens are not separate words but parts of the same word, a **##** prefix is added 

In [None]:
encoded_sequence = tokenizer.tokenize(text)
print(encoded_sequence)

### Tokens can be converted into IDs understandable by the model. 
This can be done by directly feeding the sentence to the tokenizer

In [None]:
inputs = tokenizer(text)
encoded_sequence = inputs["input_ids"]
print(encoded_sequence)

### ID sequence can be decoded

In [None]:
decoded_sequence = tokenizer.decode(encoded_sequence)
print(decoded_sequence)

## Part 2: Model

In [4]:
model.num_parameters()

109514298

### Tonkenizer returns PyTorch inputs

In [None]:
encoded_input = tokenizer(text, return_tensors='pt')
print(encoded_input.keys())

### Model outputs logits

In [None]:
output = model(**encoded_input)
print(output.logits)
print(output.logits.shape)

In [None]:
print(encoded_input['input_ids'][0])

In [5]:
model.num_parameters

<bound method ModuleUtilsMixin.num_parameters of BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Layer