In [2]:
import torch
from transformers import BertModel, BertTokenizer

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = BertModel.from_pretrained(model_name)

# Define your classifier model using BERT as feature extractor
class BERTClassifier(torch.nn.Module):
    def __init__(self, bert_model, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert_model = bert_model
        self.classifier = torch.nn.Linear(bert_model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        # Pass input through BERT model
        outputs = self.bert_model(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        # Pass pooled output through linear layer for classification
        logits = self.classifier(pooled_output)
        probabilities = torch.softmax(logits, dim=1)
        return probabilities

# Example usage
num_classes = 5  # Number of output classes

model = BERTClassifier(bert_model, num_classes)
input_text = ["# 1 Bleed Value MEL inoperative.  Aircraft limited to FL310.  At FL310 we got a #2 Bleed Valve Fault.  As per ECAM tried to reset it.  It would not reset.  Advised Center that we lost the ability to Pressurize and requested a descent to 10;000 ft.  Flew direct to ZZZ at 9;000 feet to an uneventful landing.#2 Bleed fault with no reset.   #1 Bleed Valve MEL inop.  Fix the Bleed Value when it breaks and do not MEL it."]  # Example input text
input_ids = tokenizer(input_text, truncation=True, padding=True, return_tensors='pt')['input_ids']
attention_mask = tokenizer(input_text, truncation=True, padding=True, return_tensors='pt')['attention_mask']

output_probs = model(input_ids, attention_mask)
print(output_probs)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tensor([[0.2030, 0.2346, 0.1150, 0.2954, 0.1520]], grad_fn=<SoftmaxBackward0>)


In [5]:
import torch
from transformers import BertModel, BertTokenizer

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = BertModel.from_pretrained(model_name)

# Define your classifier model using BERT as feature extractor
class BERTClassifier(torch.nn.Module):
    def __init__(self, bert_model, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert_model = bert_model
        self.classifier = torch.nn.Linear(bert_model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        # Pass input through BERT model
        outputs = self.bert_model(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        # Pass pooled output through linear layer for classification
        logits = self.classifier(pooled_output)
        probabilities = torch.softmax(logits, dim=1)
        return probabilities

# Example usage for fine-tuning
num_classes = 5  # Number of output classes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BERTClassifier(bert_model, num_classes).to(device)
input_text = ["Example input text"]  # Example input text
labels = torch.tensor([0])  # Example labels
input_ids = tokenizer(input_text, truncation=True, padding=True, return_tensors='pt')['input_ids'].to(device)
attention_mask = tokenizer(input_text, truncation=True, padding=True, return_tensors='pt')['attention_mask'].to(device)
labels = labels.expand(input_ids.size(0)).to(device)  # Expand the target tensor to match batch size

# Set up optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
model.train()
num_epochs = 3

for epoch in range(num_epochs):
    optimizer.zero_grad()

    # Forward pass
    outputs = model(input_ids, attention_mask)
    loss = criterion(outputs, labels)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(input_ids, attention_mask)
    probabilities = torch.softmax(outputs, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)
    print(f"Predicted class: {predicted_class.item()}, Probabilities: {probabilities}")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 1, Loss: 1.5227749347686768
Epoch: 2, Loss: 1.4760149717330933
Epoch: 3, Loss: 1.4523686170578003
Predicted class: 0, Probabilities: tensor([[0.2402, 0.1911, 0.2002, 0.1866, 0.1819]])


In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(input_ids, attention_mask)
    probabilities = torch.softmax(outputs.squeeze(), dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)
    print(f"Predicted class: {predicted_class.item()}, Probabilities: {probabilities}")