In [20]:
import torch
import torch.nn as nn
from transformers import BertGenerationPreTrainedModel, BertTokenizer, BertModel, BertForSequenceClassification

In [21]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

In [22]:
print(model)
print(tokenizer)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

In [23]:
text = 'Pytorch is love!! It makes using pretrained models so easy!!.'

inputs = tokenizer(text, padding=True,return_tensors='pt', truncation=True, max_length=512)
print(inputs)

{'input_ids': tensor([[  101,  1052, 22123,  2953,  2818,  2003,  2293,   999,   999,  2009,
          3084,  2478,  3653, 23654,  2098,  4275,  2061,  3733,   999,   999,
          1012,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [24]:
import torch.mps


device = torch.device('mps')
model = model.to(device)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Forward pass through BERT
outputs = model(**inputs)

# Extract hidden states or pooler output
last_hidden_states = outputs.last_hidden_state  # Shape: [batch_size, sequence_length, hidden_size]
pooled_output = outputs.pooler_output  # Shape: [batch_size, hidden_size]

print(last_hidden_states.shape)
print(pooled_output.shape)

torch.Size([1, 22, 768])
torch.Size([1, 768])


In [25]:
print(pooled_output.argmax())

tensor(284, device='mps:0')


In [35]:
model2 = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 2)
tokenizer2 = BertTokenizer.from_pretrained('bert-base-uncased')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:
text = "I really hate my mindspaace today !!!!"

# Tokenize the text and return PyTorch tensors
input_eval = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=32)
outputs_eval = model2(**input_eval)

# Convert the output logits to probabilities
predictions = torch.nn.functional.softmax(outputs_eval.logits, dim=-1)

# Display the sentiments
predicted_label = 'positive' if torch.argmax(predictions) > 0 else 'negative'
print(f"Text: {text}\nSentiment: {predicted_label}")

Text: I really hate my mindspaace today !!!!
Sentiment: positive
