# Imports

In [1]:
!pip install protobuf==3.20.3
# !pip install transformers-interpret
!pip install bertviz

Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requires p

In [2]:
import torch
import shap
# from transformers_interpret import SequenceClassificationExplainer
from bertviz import head_view, model_view


import transformers
from datasets import load_from_disk

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from tqdm import tqdm
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import networkx as nx

import pickle

In [3]:
torch.manual_seed(42)
np.random.seed(42)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on device: {device}")

Running on device: cuda


# Dataset

In [5]:
dataset = load_from_disk("/kaggle/input/erisk25/erisk_processed/erisk_processed")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 25263
    })
    val: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 2807
    })
    test: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 3115
    })
})

# Model

In [6]:
model_path = '/kaggle/input/disorbert-finetuned-models/pytorch/default/1/finetuned-models/bert-base-cased-finetuned/checkpoint-5530'
tokenizer = transformers.AutoTokenizer.from_pretrained(model_path, truncation=True, max_length=512)

config = transformers.AutoConfig.from_pretrained(model_path)
config.output_attentions = True
config.output_hidden_states = True

model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path, config=config, device_map='auto')

2025-11-23 19:28:39.484359: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763926119.670808      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763926119.723612      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
tokenizer

BertTokenizerFast(name_or_path='/kaggle/input/disorbert-finetuned-models/pytorch/default/1/finetuned-models/bert-base-cased-finetuned/checkpoint-5530', vocab_size=28996, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [8]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [9]:
text = "I feel hopeless and I want to give up."
encoding = tokenizer(
    text,
    truncation=True,
    max_length=510,
    padding=True,
    return_tensors='pt'
).to('cuda:0')
outputs = model(**encoding)

attention = outputs.attentions



In [10]:
def visualize_all_bertviz(text, model, tokenizer, base_filename="attention"):
    """
    Generates and displays:
    - Head View
    - Model View
    Also saves them as separate HTML files.
    """

    # --------------------
    # Tokenization
    # --------------------
    encoding = tokenizer(
        text,
        truncation=True,
        max_length=510,
        padding=True,
        return_tensors='pt'
    ).to(model.device)

    # --------------------
    # Forward Pass + Attentions
    # --------------------
    outputs = model(**encoding, output_attentions=True)
    
    if outputs.attentions is None:
        raise ValueError("Model did not return attentions. Load model with output_attentions=True.")

    attention = outputs.attentions
    tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'][0])

    # --------------------
    # Generate Views
    # --------------------
    print("\nGenerating Head View...")
    head_html = head_view(attention, tokens)

    print("Generating Model View...")
    model_html = model_view(attention, tokens)

    print("\nVisualization Complete!")


In [11]:
text = "I feel hopeless and I want to give up."
visualize_all_bertviz(
    text,
    model, tokenizer,
)


Generating Head View...


<IPython.core.display.Javascript object>

Generating Model View...


<IPython.core.display.Javascript object>


Visualization Complete!


In [12]:
test_texts = list(dataset['test']['text'])
len(test_texts), test_texts[0]

(3115,
 "Itachi uchiha not that I am a huge fan but man his story hits hard 1. Naruto Shipudden / OG Naruto 2. Attack on Titan 3. Saiki 4. Berserk (manga) 5.Demon Slayer / jujutsu kaisen Hashirama hit pics I have watches 6 of them including Naruto both parts but I am in cbse Dead space 2 when I was a kid Very bad tier Come on guys you all know it's Itachi uchiha One piece and Naruto fans be hating you Real life oututsuki Go to a country where dental care is cheap like India and get your teeth fixed it will cost you less than usual high prices I can fix my teeth under 400 dollors Bro they all contain same amount of DNA so even any one of them have been fused you will be still you but XX or XY sperm have been fused you would have a different gender Naruto-kun Well guys we don't lose wars like america Mob psycho 100 is shit and boring Bro told the truth and don't lie we all know that ep 1071 was at peak hype and the hype died at 1072 ep WOW loyal fans Your Twitter was attacked by T-ELON m

In [13]:
def get_global_attention_impact(model, tokenizer, text_list, batch_size=16):
    word_attention_scores = {}
    
    model.eval()
    
    # Process in batches for speed
    for i in tqdm(range(0, len(text_list), batch_size)):
        batch_texts = text_list[i : i + batch_size]
        
        # Tokenize
        inputs = tokenizer(batch_texts, return_tensors='pt', padding=True, truncation=True, max_length=512).to(model.device)
        
        with torch.no_grad():
            outputs = model(**inputs, output_attentions=True)
        
        # Get attention from the LAST layer (Layer 11 for Base model)
        # Shape: [Batch, Heads, Seq_Len, Seq_Len]
        last_layer_attention = outputs.attentions[-1]
        
        # Average across all 12 heads to get a general "importance"
        # Shape: [Batch, Seq_Len, Seq_Len]
        avg_attention = last_layer_attention.mean(dim=1)
        
        # Look at the first token (index 0 which is [CLS]) looking at all other tokens
        # Shape: [Batch, Seq_Len]
        cls_attention = avg_attention[:, 0, :]
        
        input_ids = inputs['input_ids']
        
        # Loop through batch
        for j in range(len(batch_texts)):
            # Get the attention scores for this sentence
            scores = cls_attention[j]
            ids = input_ids[j]
            
            # Map scores to words
            tokens = tokenizer.convert_ids_to_tokens(ids)
            
            for token, score in zip(tokens, scores):
                # Clean token
                clean_token = token.replace("##", "")
                
                # Skip specials
                if clean_token in ["[CLS]", "[SEP]", "[PAD]"]:
                    continue
                
                if clean_token not in word_attention_scores:
                    word_attention_scores[clean_token] = {'total_attn': 0.0, 'frequency': 0}
                
                word_attention_scores[clean_token]['total_attn'] += score.item()
                word_attention_scores[clean_token]['frequency'] += 1

    return word_attention_scores

In [14]:
attn_stats = get_global_attention_impact(model, tokenizer, test_texts)

# 2. Convert to DataFrame
df_attn = pd.DataFrame.from_dict(attn_stats, orient='index').reset_index()
df_attn.columns = ['Word', 'Total_Attention', 'Frequency']

# 3. Find words the model "looks at" the most
top_attended_words = df_attn.sort_values(by='Total_Attention', ascending=False).head(20)

print(top_attended_words)

100%|██████████| 195/195 [01:02<00:00,  3.12it/s]


    Word  Total_Attention  Frequency
9      a        30.538996      16587
353  she        29.868196       1621
7      i        28.191125      24893
19     .        26.172583      38083
217    ,        25.892798      24067
218    r        24.440288       2311
84     '        24.338969      20513
758  her        23.242508       1464
46     s        23.181393      12885
81   you        21.843289       9813
158  the        21.055235      20273
566    ?        19.433110       5305
193   he        17.704263       2630
31     t        17.204641       8891
592    l        17.194713       2717
57     p        16.461473       2610
881   lo        15.584413       1396
123    o        15.533507       2838
68     c        14.951634       2232
70     e        14.885199       2980
