loading the model

In [1]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.preprocessing import LabelEncoder
import pandas as pd
# Encode target labels
label_encoder = LabelEncoder()
df = pd.read_csv("claims_final.csv", quotechar='"')
df["SeverityEncoded"] = label_encoder.fit_transform(df["SeverityLabel"])


# Local directory where model was saved
model_save_path = "Finetuned-roBERTa/roberta_severity_model"

# Load tokenizer and model from local folder
tokenizer = RobertaTokenizer.from_pretrained(model_save_path)
model = RobertaForSequenceClassification.from_pretrained(model_save_path)

print("✅ Model and tokenizer loaded from local directory.")


  from .autonotebook import tqdm as notebook_tqdm


✅ Model and tokenizer loaded from local directory.


prediction function

In [2]:
import torch
import numpy as np

def predict_severity(text):
    # Tokenize the input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=1).item()

    # Decode numeric prediction to label
    return label_encoder.inverse_transform([prediction])[0]


testing examples

In [3]:
custom_text1 = "my eye got stabbed"
print(custom_text1)
print("Prediction:", predict_severity(custom_text1))
custom_text2 = "i am a 50 year old man. i had a car accident and fractured my right arm and leg."
print(custom_text2)
print("Prediction:", predict_severity(custom_text2))
custom_text3 = "my friend pulled my hair and ran away.it gave me a headache but i was fine"
print(custom_text3)
print("Prediction:", predict_severity(custom_text3))
custom_text4 = "i am 30 years old. i am a married woman. yesterday i fell off the stairs. the back of my head really hurts "
print(custom_text4)
print("Prediction:", predict_severity(custom_text4))



my eye got stabbed


Prediction: Medium
i am a 50 year old man. i had a car accident and fractured my right arm and leg.
Prediction: High
my friend pulled my hair and ran away.it gave me a headache but i was fine
Prediction: Medium
i am 30 years old. i am a married woman. yesterday i fell off the stairs. the back of my head really hurts 
Prediction: High


In [4]:
custom_text5 = "my hand got stuck in the factory machinery, i took it out but my few fingers are injured"
print(custom_text5)
print("Prediction:", predict_severity(custom_text5))
custom_text6 = "my hand got stuck in the factory machinery, i took it out and its fine now"
print(custom_text6)
print("Prediction:", predict_severity(custom_text6))
custom_text7 = "my hand got stuck in the factory machinery,its bluish and swollen"
print(custom_text7)
print("Prediction:", predict_severity(custom_text7))


my hand got stuck in the factory machinery, i took it out but my few fingers are injured
Prediction: Medium
my hand got stuck in the factory machinery, i took it out and its fine now
Prediction: Low
my hand got stuck in the factory machinery,its bluish and swollen
Prediction: High


In [5]:
custom_text8 = "I am 18 years old. I slipped and hurt my foot but im fine now"
print(custom_text8)
print("Prediction:", predict_severity(custom_text8))
custom_text9 = "I am 30 years old. I slipped and hurt my foot and it slightly hurts"
print(custom_text9)
print("Prediction:", predict_severity(custom_text9))
custom_text10 = "I am 58 years old. I slipped and hurt my foot and it really hurts"
print(custom_text10)
print("Prediction:", predict_severity(custom_text10))


I am 18 years old. I slipped and hurt my foot but im fine now
Prediction: Medium
I am 30 years old. I slipped and hurt my foot and it slightly hurts
Prediction: Medium
I am 58 years old. I slipped and hurt my foot and it really hurts
Prediction: Medium


In [12]:
custom_text11 = "I am a cancer patient and i am having trouble breathing"
print(custom_text11)
print("Prediction:", predict_severity(custom_text11))
custom_text12 = "i got a heart attack"
print(custom_text12)
print("Prediction:", predict_severity(custom_text12))
custom_text13 = "I am a 29-year-old male working full-time. I usually work about 40 hours every week and I have no dependents. I’m single and currently live alone. The incident occurred on 17/08/2016, and I reported it the next day on Wed. While carrying a small box in the storeroom, I slightly twisted my wrist. It wasn’t too painful at first, just a mild strain, but I reported it to be on the safe side. I continued working for the rest of the day without much trouble. My weekly wage is around 500. I consider this a minor issue,but it did need to be documented. "
print(custom_text13)
print("Prediction:", predict_severity(custom_text13))


I am a cancer patient and i am having trouble breathing
Prediction: High
i got a heart attack
Prediction: Medium
I am a 29-year-old male working full-time. I usually work about 40 hours every week and I have no dependents. I’m single and currently live alone. The incident occurred on 17/08/2016, and I reported it the next day on Wed. While carrying a small box in the storeroom, I slightly twisted my wrist. It wasn’t too painful at first, just a mild strain, but I reported it to be on the safe side. I continued working for the rest of the day without much trouble. My weekly wage is around 500. I consider this a minor issue,but it did need to be documented. 
Prediction: Low


applying shap

In [6]:
model.eval()  # Set model to evaluation mode

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.3, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

testing on 1 example

In [7]:
from transformers import pipeline
import shap

# Build the pipeline
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)

# Wrap with SHAP
explainer = shap.Explainer(pipe)

# Sample input text
text = ["I slipped on wet stairs and hurt my lower back. I’m in severe pain."]

# Get SHAP values
shap_values = explainer(text)


Device set to use cpu
PartitionExplainer explainer: 2it [01:10, 70.21s/it]               


In [8]:
shap.plots.text(shap_values[0])


prediction function with shap

In [9]:
import torch
import shap
import matplotlib.pyplot as plt

def predict_severity(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Get prediction from model
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=1).item()

    # Decode numeric prediction to label
    predicted_label = label_encoder.inverse_transform([prediction])[0]

    # Define SHAP-compatible prediction function
    def predict_proba(texts):
        # Convert any array or non-string input to list of strings
        str_texts = [str(t) for t in texts]
        encodings = tokenizer(str_texts, return_tensors="pt", padding=True, truncation=True, max_length=128)
        with torch.no_grad():
            output = model(**encodings)
            probs = torch.nn.functional.softmax(output.logits, dim=1)
        return probs.detach().numpy()

    # Create SHAP explainer (using default masker)
    explainer = shap.Explainer(predict_proba, tokenizer)

    # Compute SHAP values
    shap_values = explainer([text])

    return predicted_label, shap_values


In [10]:
label, shap_values = predict_severity("I fractured my leg and can’t walk")
shap.plots.text(shap_values[0])  # To display the SHAP explanation
print("Predicted Severity:", label)


Predicted Severity: High


In [11]:
import shap
import matplotlib.pyplot as plt

# List of custom texts
custom_texts = [
    "my eye got stabbed",
    "i am a 50 year old man. i had a car accident and fractured my right arm and leg.",
    "my friend pulled my hair and ran away.it gave me a headache but i was fine",
    "i am 30 years old. i am a married woman. yesterday i fell off the stairs. the back of my head really hurts",
    "my hand got stuck in the factory machinery, i took it out but my few fingers are injured",
    "my hand got stuck in the factory machinery, i took it out and its fine now",
    "my hand got stuck in the factory machinery,its bluish and swollen",
    "I am 18 years old. I slipped and hurt my foot but im fine now",
    "I am 30 years old. I slipped and hurt my foot and it slightly hurts",
    "I am 58 years old. I slipped and hurt my foot and it really hurts",
    "I am a cancer patient and i am having trouble breathing",
    "i got a heart attack",
    "I am a 29-year-old male working full-time. I usually work about 40 hours every week and I have no dependents. I’m single and currently live alone. The incident occurred on 17/08/2016, and I reported it the next day on Wed. While carrying a small box in the storeroom, I slightly twisted my wrist. It wasn’t too painful at first, just a mild strain, but I reported it to be on the safe side. I continued working for the rest of the day without much trouble. My weekly wage is around 500. I consider this a minor issue,but it did need to be documented."
]

# Iterate through custom inputs
for i, text in enumerate(custom_texts, 1):
    print(f"\n🔹 Custom Text {i}:\n{text}")
    label, shap_values = predict_severity(text)
    print("🔸 Prediction:", label)

    # Display SHAP explanation
    print("🔍 SHAP Explanation:")
    shap.plots.text(shap_values[0])  # Automatically uses matplotlib backend





🔹 Custom Text 1:
my eye got stabbed
🔸 Prediction: Medium
🔍 SHAP Explanation:



🔹 Custom Text 2:
i am a 50 year old man. i had a car accident and fractured my right arm and leg.


KeyboardInterrupt: 

In [15]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import torch
from lime.lime_text import LimeTextExplainer
import numpy as np


def predict_severity_with_lime(text):
    # Tokenize and predict
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=1).item()

    # Decode numeric prediction to label
    predicted_label = label_encoder.inverse_transform([prediction])[0]

    # Define LIME-compatible prediction function
    def predict_proba(texts):
        encodings = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=128)
        with torch.no_grad():
            output = model(**encodings)
            probs = torch.nn.functional.softmax(output.logits, dim=1)
        return probs.detach().numpy()

    # Create LIME explainer
    class_names = list(label_encoder.classes_)
    explainer = LimeTextExplainer(class_names=class_names)

    # Get LIME explanation
    explanation = explainer.explain_instance(
        text_instance=text,
        classifier_fn=predict_proba,
        num_features=5  # top N tokens to display
    )

    return predicted_label, explanation


In [16]:
label, exp = predict_severity_with_lime("my hand got stuck in the machine and is swollen")
print("Predicted severity:", label)
print("Top contributing words:", exp.as_list())


Predicted severity: Medium
Top contributing words: [(np.str_('stuck'), -0.2928933236868946), (np.str_('swollen'), -0.28185865011346656), (np.str_('machine'), 0.09361513744866376), (np.str_('is'), -0.061717022936473236), (np.str_('in'), -0.049317127163199644)]


In [17]:
label, exp = predict_severity_with_lime("i got stabbed in my eye and its bleeding too much")
print("Predicted severity:", label)
print("Top contributing words:", exp.as_list())


Predicted severity: High
Top contributing words: [(np.str_('bleeding'), -0.09006256247011625), (np.str_('stabbed'), -0.08774724013186441), (np.str_('too'), -0.04594569777266365), (np.str_('got'), 0.03837262838777274), (np.str_('eye'), -0.028198799604058467)]


In [18]:
label, exp = predict_severity_with_lime("i am having severe pain in my chest and i think i might be having a heart attack")
print("Predicted severity:", label)
print("Top contributing words:", exp.as_list())


KeyboardInterrupt: 