In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

model = BertForSequenceClassification.from_pretrained('../sentiment_analysis_backend/Models/Model_2')
tokenizer = BertTokenizer.from_pretrained('../sentiment_analysis_backend/Models/Model_2')
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
from torch.nn.functional import softmax

def predict_with_confidence(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    
    probabilities = softmax(logits, dim=-1)
    
    prediction = torch.argmax(probabilities, dim=-1).item()
    confidence = probabilities[0, prediction].item()
    
    return prediction, confidence

# Example usage
text = """HUH, uh, ok, I'm sorry?"""

predicted_class, confidence = predict_with_confidence(text)
print(f"Predicted class: {predicted_class}, Confidence: {confidence:.2f}")

Predicted class: 0, Confidence: 0.91


In [18]:
def explain_with_gradients(text, threshold=0.0):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    # Extract input embeddings and enable gradients
    embeddings = model.get_input_embeddings()(input_ids).detach()
    embeddings.requires_grad = True

    # Forward pass using embeddings
    outputs = model(inputs_embeds=embeddings, attention_mask=attention_mask)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    confidence = torch.softmax(logits, dim=1)[0, predicted_class].item()

    # Backward pass to compute gradients
    logits[0, predicted_class].backward()
    gradients = embeddings.grad[0]  # Gradients with respect to embeddings

    # Compute token importance scores as the norm of gradient * embedding
    importance_scores = torch.norm(gradients * embeddings[0], dim=-1)

    # Normalize scores for interpretability
    importance_scores = importance_scores / importance_scores.sum()

    # Map tokens to words
    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])

    # Filter tokens by threshold
    important_words = [
        {"word": token, "contribution": score.item()}
        for token, score in zip(tokens, importance_scores)
        if score.item() > threshold and token not in ["[CLS]", "[SEP]"]
    ]

    return {
        "text": text,
        "predicted_class": predicted_class,
        "confidence": confidence,
        "important_words": important_words,
    }

# Example usage
text = "Not gonna lie David that was AWESOME!"
result = explain_with_gradients(text, threshold=0.01)
print(result)


{'text': 'Not gonna lie David that was AWESOME!', 'predicted_class': 1, 'confidence': 0.9130285978317261, 'important_words': [{'word': 'not', 'contribution': 0.08270232379436493}, {'word': 'gonna', 'contribution': 0.20245884358882904}, {'word': 'lie', 'contribution': 0.30110064148902893}, {'word': 'david', 'contribution': 0.1406608670949936}, {'word': 'that', 'contribution': 0.03422898054122925}, {'word': 'was', 'contribution': 0.028362615033984184}, {'word': 'awesome', 'contribution': 0.10779783874750137}, {'word': '!', 'contribution': 0.03372906893491745}]}
