# Imports

In [1]:
!pip install protobuf==3.20.3
!pip install transformers-interpret

Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 3.20.3 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
a2a-sdk 0.3.10 requires p

In [2]:
import torch
import shap
from transformers_interpret import SequenceClassificationExplainer

import transformers
from datasets import load_from_disk

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import pickle

2025-11-23 20:22:03.523402: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763929323.709233      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763929323.766034      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
torch.manual_seed(42)
np.random.seed(42)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on device: {device}")

Running on device: cuda


# Dataset

In [5]:
dataset = load_from_disk("/kaggle/input/erisk25/erisk_processed/erisk_processed")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 25263
    })
    val: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 2807
    })
    test: Dataset({
        features: ['text', 'label', 'user'],
        num_rows: 3115
    })
})

# Model

In [6]:
model_path = '/kaggle/input/disorbert-finetuned-models/pytorch/default/1/finetuned-models/bert-base-cased-finetuned/checkpoint-5530'
tokenizer = transformers.AutoTokenizer.from_pretrained(model_path, truncation=True, max_length=512)
model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path, device_map='auto')

In [7]:
tokenizer

BertTokenizerFast(name_or_path='/kaggle/input/disorbert-finetuned-models/pytorch/default/1/finetuned-models/bert-base-cased-finetuned/checkpoint-5530', vocab_size=28996, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [8]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [9]:
explainer = SequenceClassificationExplainer(model, tokenizer)
explainer.n_steps = 8
explainer.internal_batch_size = 1
explainer

<transformers_interpret.explainers.text.sequence_classification.SequenceClassificationExplainer at 0x79b29c3cdb10>

In [10]:
test_texts = list(dataset['test']['text'])
len(test_texts), test_texts[0]

(3115,
 "Itachi uchiha not that I am a huge fan but man his story hits hard 1. Naruto Shipudden / OG Naruto 2. Attack on Titan 3. Saiki 4. Berserk (manga) 5.Demon Slayer / jujutsu kaisen Hashirama hit pics I have watches 6 of them including Naruto both parts but I am in cbse Dead space 2 when I was a kid Very bad tier Come on guys you all know it's Itachi uchiha One piece and Naruto fans be hating you Real life oututsuki Go to a country where dental care is cheap like India and get your teeth fixed it will cost you less than usual high prices I can fix my teeth under 400 dollors Bro they all contain same amount of DNA so even any one of them have been fused you will be still you but XX or XY sperm have been fused you would have a different gender Naruto-kun Well guys we don't lose wars like america Mob psycho 100 is shit and boring Bro told the truth and don't lie we all know that ep 1071 was at peak hype and the hype died at 1072 ep WOW loyal fans Your Twitter was attacked by T-ELON m

In [11]:
text = "I have been staying in bed all day and feel empty."
word_attributions = explainer(text)

In [12]:
explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (1.00),LABEL_1,2.15,[CLS] i have been staying in bed all day and feel empty . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (1.00),LABEL_1,2.15,[CLS] i have been staying in bed all day and feel empty . [SEP]
,,,,


In [13]:
MAX_LEN = 510

def truncate_text(text, max_tokens=MAX_LEN):
    tokens = tokenizer.tokenize(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return tokenizer.convert_tokens_to_string(tokens)

In [14]:
def cleanup():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()

In [15]:
def get_global_impact_ig(explainer, text_list):
    global_word_scores = {}

    for text in tqdm(test_texts):
        text = truncate_text(text)
        result = explainer(text)
        cleanup() 

        for word, score in result:
            word = word.replace("##", "")

            if word in ["[CLS]", "[SEP]", "[PAD]"]:
                continue

            if word not in global_word_scores:
                global_word_scores[word] = {"total_score": 0.0, "frequency": 0}

            global_word_scores[word]["total_score"] += score
            global_word_scores[word]["frequency"] += 1

    for key, value in global_word_scores.items():
        global_word_scores[key]['avg_score'] = value['total_score'] / value['frequency'] if value['frequency'] > 8 else 0

    return global_word_scores

In [16]:
word_stats = get_global_impact_ig(explainer, test_texts)

df_ig = pd.DataFrame.from_dict(word_stats, orient="index").reset_index()
df_ig.columns = ["Word", "Total_Attribution", "Frequency", "Average_Attribution"]

# Sort and print top depression indicators
top_depression_words = df_ig.sort_values(by="Average_Attribution", ascending=False).head(20)
print(top_depression_words)

100%|██████████| 3115/3115 [21:43<00:00,  2.39it/s]

              Word  Total_Attribution  Frequency  Average_Attribution
2416        icidal           4.469277         17             0.262899
5428          lone           3.772235         20             0.188612
3046      pressive           1.561531         11             0.141957
2372     substance           2.208905         16             0.138057
3389           xin           1.459004         11             0.132637
2553     treatment           6.070323         46             0.131964
3039          mani           1.709778         13             0.131521
4772       confess           1.339847         11             0.121804
2296    prescribed           2.187816         19             0.115148
151            mob           1.136275         10             0.113628
1732       teenage           1.442037         13             0.110926
9959     intensity           1.300465         12             0.108372
1673          tomb           0.974791          9             0.108310
7135          lica  


