In [None]:
#============
# 1. Mounting Google Drive & Installing Dependencies
#============
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
!pip install afinn transformers datasets evaluate openpyxl shap

Collecting afinn
  Downloading afinn-0.1.tar.gz (52 kB)
[?25l     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/52.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m52.6/52.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m84.1/84.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: afinn
  Building wheel for afinn (setup.py) ... [?25l[?25hdone
  Created wheel for afinn: filename=afinn-0.1-

In [None]:
#============# 2. Extracting SWMH Dataset from ZIP/TAR#============
path_zip = "/content/drive/MyDrive/Mansoor_Share/Datasets/6476179.zip"
!unzip -o "$path_zip" -d "/content/"
!tar -xvzf /content/swmh.tar.gz -C /content/

Archive:  /content/drive/MyDrive/Mansoor_Share/Datasets/6476179.zip
 extracting: /content/swmh.tar.gz    
swmh/
swmh/train.csv
swmh/test.csv
swmh/val.csv


In [None]:
# ========
# 3. Imports
#============
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import re
import os
import zipfile
from afinn import Afinn
from datasets import Dataset
from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score, classification_report
from safetensors.torch import load_file
import shap
import matplotlib.pyplot as plt

In [None]:
#============
# 4. Preprocessing Function
#============
def preprocess_data(csv_path):
    df = pd.read_csv(csv_path)

    def clean_text(text):
        text = str(text)
        text = re.sub(r"http\S+", "", text)
        text = re.sub(r"[^A-Za-z0-9\s.,!?']", " ", text)
        text = re.sub(r"\s+", " ", text).strip()
        return text.lower()

    df["text"] = df["text"].apply(clean_text)

    label2id = {label: i for i, label in enumerate(sorted(df["label"].unique()))}
    id2label = {v: k for k, v in label2id.items()}
    df["label_id"] = df["label"].map(label2id)

    af = Afinn()
    def compute_emotion_score(text):
        tokens = text.split()
        scores = [abs(af.score(tok)) / 5.0 for tok in tokens if af.score(tok) < 0]
        return float(np.mean(scores)) if scores else 0.0

    df["emotion_score"] = df["text"].apply(compute_emotion_score)
    return df, label2id, id2label


In [None]:
#============
# 5. RobertaSWMHClassifier Model Definition
#============
class RobertaSWMHClassifier(nn.Module):
    def __init__(self, num_labels, temperature=0.07):
        super().__init__()
        self.config = AutoConfig.from_pretrained("roberta-large", num_labels=num_labels)
        self.encoder = AutoModel.from_pretrained("roberta-large", config=self.config)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Sequential(
            nn.Linear(self.config.hidden_size, 128),
            nn.GELU(),
            nn.LayerNorm(128),
            nn.Dropout(0.2),
            nn.GELU(),
            nn.Linear(128, num_labels)
        )
        self.regressor = nn.Linear(self.config.hidden_size, 1)
        self.proj = nn.Sequential(
            nn.Linear(self.config.hidden_size, 128),
            nn.GELU(),
            nn.Linear(128, 64)
        )
        self.ce_loss = nn.CrossEntropyLoss()
        self.mse_loss = nn.MSELoss()
        self.temperature = temperature

    def contrastive_loss(self, features, labels):
        features = F.normalize(features, dim=1)
        sim = torch.matmul(features, features.T) / self.temperature
        labels = labels.unsqueeze(1)
        mask = torch.eq(labels, labels.T).float()
        mask.fill_diagonal_(0)
        log_prob = sim - torch.log(torch.exp(sim).sum(dim=1, keepdim=True))
        pos_log_prob = (mask * log_prob).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
        return -pos_log_prob.mean()

    def forward(self, input_ids=None, attention_mask=None, labels=None, emotion_score=None):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        pooled = self.dropout(outputs.last_hidden_state[:, 0])
        logits = self.classifier(pooled)
        emo_pred = self.regressor(pooled).squeeze(-1)
        proj_vec = self.proj(pooled)
        loss = None
        if labels is not None:
            ce = self.ce_loss(logits, labels.long())
            scl = self.contrastive_loss(proj_vec, labels.long())
            mse = self.mse_loss(emo_pred, emotion_score.float()) if emotion_score is not None else 0
            loss = ce + 0.3 * mse + 0.1 * scl
        return {
            "loss": loss,
            "logits": logits,
            "emotion_pred": emo_pred,
            "proj_vec": proj_vec
        }

#============
# 6. Evaluation Function
#============
def evaluate_model(csv_path, weights_path, num_labels, from_zip=True, out_excel="test_predictions.xlsx"):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    df_test, label2id, id2label = preprocess_data(csv_path)
    ds_test = Dataset.from_pandas(df_test[["text", "label_id", "emotion_score"]])

    tokenizer = AutoTokenizer.from_pretrained("roberta-large")
    def tokenize_fn(batch):
        tok = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=512)
        tok["labels"] = batch["label_id"]
        tok["emotion_score"] = batch["emotion_score"]
        return tok

    tokenized_test = ds_test.map(tokenize_fn, batched=True)
    tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask", "labels", "emotion_score"])

    model = RobertaSWMHClassifier(num_labels).to(device)

    if from_zip:
        ckpt_dir = "/content/tmp_ckpt"
        os.makedirs(ckpt_dir, exist_ok=True)
        with zipfile.ZipFile(weights_path, 'r') as zip_ref:
            zip_ref.extractall(ckpt_dir)
        model_path = os.path.join(ckpt_dir, "model.safetensors")
        state_dict = load_file(model_path)
        model.load_state_dict(state_dict, strict=False)
    else:
        state_dict = torch.load(weights_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)

    model.eval()

    args = TrainingArguments(
        output_dir="./results",
        per_device_eval_batch_size=8,
        report_to="none",
        logging_strategy="no",
        save_strategy="no",
        eval_strategy="no"
    )

    trainer = Trainer(
        model=model,
        args=args,
        data_collator=lambda batch: {
            "input_ids": torch.stack([x["input_ids"] for x in batch]),
            "attention_mask": torch.stack([x["attention_mask"] for x in batch]),
            "labels": torch.tensor([x["labels"] for x in batch]),
            "emotion_score": torch.tensor([x["emotion_score"] for x in batch])
        }
    )

    preds = trainer.predict(tokenized_test)
    logits = preds.predictions[0]
    y_pred = np.argmax(logits, axis=-1)
    y_true = preds.label_ids

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")
    print(f"‚úÖ Accuracy: {acc:.4f}")
    print(f"‚úÖ Macro F1: {f1:.4f}")
    print("\nClassification Report:")

    # Fix label mismatch issue
    unique_ids = sorted(set(y_true))
    target_names = [id2label[i] for i in unique_ids]
    print(classification_report(y_true, y_pred, labels=unique_ids, target_names=target_names))

    df_test["predicted_label_id"] = y_pred
    df_test["predicted_label"] = df_test["predicted_label_id"].map(id2label)
    df_test["true_label"] = df_test["label_id"].map(id2label)
    df_test[["text", "true_label", "predicted_label"]].to_excel(out_excel, index=False)
    print(f"üì¶ Saved predictions to {out_excel}")

    return model, out_excel, id2label


In [None]:
#============
# 7. SHAP Analysis Function
#============

class WrappedModel:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.model.eval()
        self.device = next(model.parameters()).device

    def __call__(self, texts):
        if isinstance(texts, pd.Series):
            texts = texts.tolist()
        texts = [str(t) for t in texts]

        encoded = self.tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = encoded["input_ids"].to(self.device)
        attention_mask = encoded["attention_mask"].to(self.device)

        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs["logits"]
            probs = torch.nn.functional.softmax(logits, dim=-1)
        return probs.cpu().numpy()

def save_shap_to_excel(shap_values, texts, df_sample, label2id, output_file="shap_output.xlsx"):
    rows = []
    correct_rows = []
    incorrect_rows = []

    for i, text in enumerate(texts):
        true_label = df_sample.iloc[i]["true_label"]
        pred_label = df_sample.iloc[i]["predicted_label"]
        pred_label_id = label2id[pred_label]

        token_list = shap_values.data[i]
        shap_list = shap_values.values[i][:, pred_label_id]

        token_value_pairs = list(zip(token_list, shap_list))
        top_20 = sorted(token_value_pairs, key=lambda x: abs(x[1]), reverse=True)[:20]
        tokens, values = zip(*top_20)

        row = {
            "text": text,
            "true_label": true_label,
            "predicted_label": pred_label,
            "is_correct": true_label == pred_label,
            "top_10_tokens": ", ".join(tokens),
            "top_10_shap_values": ", ".join([f"{v:.4f}" for v in values])
        }

        rows.append(row)

        if true_label == pred_label:
            correct_rows.append(row)
        else:
            incorrect_rows.append(row)

    df_all = pd.DataFrame(rows)
    df_correct = pd.DataFrame(correct_rows)
    df_incorrect = pd.DataFrame(incorrect_rows)

    # Save to a single Excel with 3 sheets
    with pd.ExcelWriter(output_file) as writer:
        df_all.to_excel(writer, sheet_name="ALL", index=False)
        df_correct.to_excel(writer, sheet_name="CORRECT", index=False)
        df_incorrect.to_excel(writer, sheet_name="INCORRECT", index=False)

    print(f"üìä SHAP saved: {output_file}")
    print(f"   ‚úî Correct rows: {len(df_correct)}")
    print(f"   ‚úñ Incorrect rows: {len(df_incorrect)}")


def run_shap_analysis(model, test_csv_path, label2id, sample_size=50, title="SHAP Analysis"):
    import shap
    import matplotlib.pyplot as plt

    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained("roberta-large")
    wrapped_model = WrappedModel(model, tokenizer)

    df = pd.read_excel(test_csv_path)
    df_sample = df.sample(sample_size, random_state=42)

    texts = [str(t) for t in df_sample["text"].tolist()]
    explainer = shap.Explainer(wrapped_model, shap.maskers.Text(tokenizer), output_names=list(label2id.keys()))
    shap_values = explainer(texts)

    correct_rows = df_sample[df_sample["true_label"] == df_sample["predicted_label"]]
    incorrect_rows = df_sample[df_sample["true_label"] != df_sample["predicted_label"]]

    correct_idx = correct_rows.index[0] if not correct_rows.empty else None
    incorrect_idx = incorrect_rows.index[0] if not incorrect_rows.empty else None

    if correct_idx is not None and correct_idx < len(shap_values.values):
        print("‚úÖ Correct Prediction Example:")
        shap_idx_correct = df_sample.index.get_loc(correct_idx)
        shap.plots.text(shap_values[shap_idx_correct], max_display=20)
        plt.title(f"{title} ‚Äî Correct")

    if incorrect_idx is not None and incorrect_idx < len(shap_values.values):
        print("‚ùå Incorrect Prediction Example:")
        shap_idx_incorrect = df_sample.index.get_loc(incorrect_idx)
        shap.plots.text(shap_values[shap_idx_incorrect], max_display=20)
        plt.title(f"{title} ‚Äî Incorrect")

    # Save all SHAP values to Excel
    safe_title = title.replace(" ", "_").replace("‚Äî", "-")
    output_file = f"/content/drive/MyDrive/SHAP_Results/{safe_title}_SHAP_Analysis.xlsx"
    save_shap_to_excel(shap_values, texts, df_sample, label2id, output_file=output_file)

    # save_shap_to_excel(shap_values, texts, df_sample, output_file=output_file)


In [None]:
#============
# 8. Set Paths
#============
RMHD_weights = '/content/drive/MyDrive/Mansoor_Share/weights/RMHDCombined8_roberta_large_constructive_learning_affinnemotion_scl_v1.zip'
SWMH_weights = '/content/drive/MyDrive/Mansoor_Share/weights/pytorch_modelroberta_emotion_wloss_constructive.bin'
RMHD_kaggle_weights = '/content/drive/MyDrive/Mansoor_Share/weights/rmhdkaggle_pytorch_modelroberta_emotion_wloss_constructive.bin'

test_rmhd    = "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_combined/RMHD_combinedsorted_test.csv"
test_swmh    =  "/content/swmh/test.csv"
test_rmhd_kaggle   =  "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_Kaggle/test.csv"



In [None]:
#============
# 9. Evaluate Models (in-domain only)
#============
# SWMH
model_swmh, file_swmh, swmh_id2label = evaluate_model(test_swmh, SWMH_weights, num_labels=5, from_zip=False, out_excel="SWMH_predictions.xlsx")

# RMHD
model_rmhd, file_rmhd, rmhd_id2label = evaluate_model(test_rmhd, RMHD_weights, num_labels=8, from_zip=True, out_excel="RMHD_predictions.xlsx")

# RMHD Kaggle
model_kaggle, file_kaggle, kaggle_id2label = evaluate_model(test_rmhd_kaggle, RMHD_kaggle_weights, num_labels=5, from_zip=False, out_excel="RMHDKaggle_predictions.xlsx")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/10883 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.7395
‚úÖ Macro F1: 0.7488

Classification Report:
                   precision    recall  f1-score   support

     self.Anxiety       0.85      0.81      0.83      1911
self.SuicideWatch       0.68      0.74      0.71      2018
     self.bipolar       0.89      0.74      0.81      1493
  self.depression       0.68      0.74      0.71      3774
  self.offmychest       0.73      0.65      0.69      1687

         accuracy                           0.74     10883
        macro avg       0.77      0.74      0.75     10883
     weighted avg       0.75      0.74      0.74     10883

üì¶ Saved predictions to SWMH_predictions.xlsx


Map:   0%|          | 0/14410 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.8700
‚úÖ Macro F1: 0.7913

Classification Report:
               precision    recall  f1-score   support

         ADHD       0.93      0.94      0.93      1917
      Anxiety       0.81      0.79      0.80      2725
          BPD       0.90      0.67      0.77       915
      Bipolar       0.85      0.58      0.69       211
   Depression       0.89      0.96      0.92      7261
HealthAnxiety       0.85      0.70      0.77       246
         PTSD       0.82      0.74      0.78       262
SocialAnxiety       0.72      0.63      0.67       873

     accuracy                           0.87     14410
    macro avg       0.85      0.75      0.79     14410
 weighted avg       0.87      0.87      0.87     14410

üì¶ Saved predictions to RMHD_predictions.xlsx


Map:   0%|          | 0/38878 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.6965
‚úÖ Macro F1: 0.7091

Classification Report:
              precision    recall  f1-score   support

     anxiety       0.83      0.83      0.83      5896
  depression       0.60      0.65      0.63     10682
      lonely       0.72      0.71      0.71      4305
mentalhealth       0.70      0.61      0.65      8319
suicidewatch       0.72      0.73      0.73      9676

    accuracy                           0.70     38878
   macro avg       0.71      0.71      0.71     38878
weighted avg       0.70      0.70      0.70     38878

üì¶ Saved predictions to RMHDKaggle_predictions.xlsx


In [None]:
#============
# 10. Run SHAP Analyses
#============
run_shap_analysis(model_swmh, file_swmh, {v: k for k, v in swmh_id2label.items()}, title="SWMH Model SHAP")
run_shap_analysis(model_rmhd, file_rmhd, {v: k for k, v in rmhd_id2label.items()}, title="RMHD Model SHAP")
run_shap_analysis(model_kaggle, file_kaggle, {v: k for k, v in kaggle_id2label.items()}, title="RMHD Kaggle Model SHAP")



  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:   6%|‚ñå         | 3/50 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  12%|‚ñà‚ñè        | 6/50 [00:27<03:31,  4.81s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  14%|‚ñà‚ñç        | 7/50 [00:33<03:51,  5.38s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  20%|‚ñà‚ñà        | 10/50 [00:46<02:43,  4.08s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  24%|‚ñà‚ñà‚ñç       | 12/50 [00:56<02:47,  4.42s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  28%|‚ñà‚ñà‚ñä       | 14/50 [01:07<02:42,  4.53s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  30%|‚ñà‚ñà‚ñà       | 15/50 [01:15<03:24,  5.83s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 22/50 [01:46<01:52,  4.00s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 23/50 [01:54<02:15,  5.02s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (694 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 24/50 [02:06<03:06,  7.17s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [02:14<03:04,  7.38s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 30/50 [02:37<01:32,  4.64s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 32/50 [02:45<01:12,  4.05s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 33/50 [02:57<01:49,  6.43s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 36/50 [03:09<01:01,  4.40s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 37/50 [03:18<01:17,  5.93s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 40/50 [03:31<00:43,  4.37s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 42/50 [03:44<00:40,  5.01s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 44/50 [03:53<00:27,  4.64s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 46/50 [04:09<00:23,  5.93s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 47/50 [04:16<00:19,  6.36s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 48/50 [04:23<00:13,  6.66s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [04:37<00:00,  6.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 51it [04:43,  5.92s/it]


üìä SHAP saved: /content/drive/MyDrive/SHAP_Results/SWMH_Model_SHAP_SHAP_Analysis.xlsx
   ‚úî Correct rows: 42
   ‚úñ Incorrect rows: 8


  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  10%|‚ñà         | 5/50 [00:16<01:36,  2.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  12%|‚ñà‚ñè        | 6/50 [00:22<02:51,  3.90s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  16%|‚ñà‚ñå        | 8/50 [00:37<03:54,  5.58s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  20%|‚ñà‚ñà        | 10/50 [00:53<04:23,  6.60s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  26%|‚ñà‚ñà‚ñå       | 13/50 [01:09<03:22,  5.48s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  32%|‚ñà‚ñà‚ñà‚ñè      | 16/50 [01:21<02:22,  4.20s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  40%|‚ñà‚ñà‚ñà‚ñà      | 20/50 [01:40<01:58,  3.95s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 22/50 [01:48<01:51,  3.97s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [02:01<01:40,  4.01s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 27/50 [02:13<01:48,  4.72s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 28/50 [02:20<02:01,  5.52s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 29/50 [02:29<02:14,  6.41s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 31/50 [02:39<01:46,  5.61s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 34/50 [02:55<01:19,  4.98s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [03:01<01:18,  5.22s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 38/50 [03:15<00:53,  4.49s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 42/50 [03:31<00:31,  3.91s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 43/50 [03:38<00:32,  4.64s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 48/50 [04:02<00:08,  4.41s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (1064 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 49/50 [04:15<00:07,  7.03s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 51it [04:28,  5.60s/it]


üìä SHAP saved: /content/drive/MyDrive/SHAP_Results/RMHD_Model_SHAP_SHAP_Analysis.xlsx
   ‚úî Correct rows: 47
   ‚úñ Incorrect rows: 3


PartitionExplainer explainer:  18%|‚ñà‚ñä        | 9/50 [00:30<02:31,  3.70s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  26%|‚ñà‚ñà‚ñå       | 13/50 [00:47<02:10,  3.52s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  34%|‚ñà‚ñà‚ñà‚ñç      | 17/50 [01:01<01:32,  2.81s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  38%|‚ñà‚ñà‚ñà‚ñä      | 19/50 [01:10<01:51,  3.59s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 21/50 [01:19<01:55,  3.97s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (1261 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 22/50 [01:33<03:13,  6.90s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 25/50 [01:46<02:00,  4.84s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 27/50 [02:02<02:26,  6.35s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 28/50 [02:13<02:50,  7.73s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 32/50 [02:35<01:37,  5.40s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 35/50 [02:46<00:58,  3.91s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 38/50 [02:58<00:44,  3.67s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 41/50 [03:16<00:43,  4.86s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 46/50 [03:32<00:10,  2.64s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 51it [03:49,  4.77s/it]


üìä SHAP saved: /content/drive/MyDrive/SHAP_Results/RMHD_Kaggle_Model_SHAP_SHAP_Analysis.xlsx
   ‚úî Correct rows: 34
   ‚úñ Incorrect rows: 16
