In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#============# 2. Extracting SWMH Dataset from ZIP/TAR#============
path_zip = "/content/drive/MyDrive/Mansoor_Share/Datasets/6476179.zip"
!unzip -o "$path_zip" -d "/content/"
!tar -xvzf /content/swmh.tar.gz -C /content/

Archive:  /content/drive/MyDrive/Mansoor_Share/Datasets/6476179.zip
 extracting: /content/swmh.tar.gz    
swmh/
swmh/train.csv
swmh/test.csv
swmh/val.csv


In [3]:
!pip install afinn transformers datasets evaluate openpyxl


Collecting afinn
  Downloading afinn-0.1.tar.gz (52 kB)
[?25l     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/52.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m52.6/52.6 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m84.1/84.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: afinn
  Building wheel for afinn (setup.py) ... [?25l[?25hdone
  Created wheel for afinn: filename=afinn-0.1-

In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import re
import zipfile
import os
from afinn import Afinn
from datasets import Dataset
from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer
from sklearn.metrics import accuracy_score, f1_score, classification_report


def preprocess_data(csv_path):
    """
    Load and preprocess the test data: clean text, compute emotion score, encode labels.
    """
    df = pd.read_csv(csv_path)

    def clean_text(text):
        text = str(text)
        text = re.sub(r"http\S+", "", text)
        text = re.sub(r"[^A-Za-z0-9\s.,!?']", " ", text)
        text = re.sub(r"\s+", " ", text).strip()
        return text.lower()

    df["text"] = df["text"].apply(clean_text)

    label2id = {label: i for i, label in enumerate(sorted(df["label"].unique()))}
    id2label = {v: k for k, v in label2id.items()}
    df["label_id"] = df["label"].map(label2id)

    af = Afinn()

    def compute_emotion_score(text):
        tokens = text.split()
        scores = [abs(af.score(tok)) / 5.0 for tok in tokens if af.score(tok) < 0]
        return float(np.mean(scores)) if scores else 0.0

    df["emotion_score"] = df["text"].apply(compute_emotion_score)
    return df, label2id, id2label


In [5]:
from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, classification_report
from safetensors.torch import load_file
import zipfile, os
import numpy as np

def evaluate_model(csv_path, weights_path, num_labels, from_zip=True, out_excel="test_predictions.xlsx"):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Preprocess data
    df_test, label2id, id2label = preprocess_data(csv_path)
    ds_test = Dataset.from_pandas(df_test[["text", "label_id", "emotion_score"]])

    tokenizer = AutoTokenizer.from_pretrained("roberta-large")
    def tokenize_fn(batch):
        tok = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=512)
        tok["labels"] = batch["label_id"]
        tok["emotion_score"] = batch["emotion_score"]
        return tok

    tokenized_test = ds_test.map(tokenize_fn, batched=True)
    tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask", "labels", "emotion_score"])

    # Model
    class RobertaSWMHClassifier(nn.Module):
        def __init__(self, num_labels, temperature=0.07):
            super().__init__()
            self.config = AutoConfig.from_pretrained("roberta-large", num_labels=num_labels)
            self.encoder = AutoModel.from_pretrained("roberta-large", config=self.config)
            self.dropout = nn.Dropout(0.3)
            self.classifier = nn.Sequential(
                nn.Linear(self.config.hidden_size, 128),
                nn.GELU(),
                nn.LayerNorm(128),
                nn.Dropout(0.2),
                nn.GELU(),
                nn.Linear(128, num_labels)
            )
            self.regressor = nn.Linear(self.config.hidden_size, 1)
            self.proj = nn.Sequential(
                nn.Linear(self.config.hidden_size, 128),
                nn.GELU(),
                nn.Linear(128, 64)
            )
            self.ce_loss = nn.CrossEntropyLoss()
            self.mse_loss = nn.MSELoss()
            self.temperature = temperature

        def contrastive_loss(self, features, labels):
            features = F.normalize(features, dim=1)
            sim = torch.matmul(features, features.T) / self.temperature
            labels = labels.unsqueeze(1)
            mask = torch.eq(labels, labels.T).float()
            mask.fill_diagonal_(0)
            log_prob = sim - torch.log(torch.exp(sim).sum(dim=1, keepdim=True))
            pos_log_prob = (mask * log_prob).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
            return -pos_log_prob.mean()

        def forward(self, input_ids=None, attention_mask=None, labels=None, emotion_score=None):
            outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
            pooled = self.dropout(outputs.last_hidden_state[:, 0])
            logits = self.classifier(pooled)
            emo_pred = self.regressor(pooled).squeeze(-1)
            proj_vec = self.proj(pooled)
            loss = None
            if labels is not None:
                ce = self.ce_loss(logits, labels.long())
                scl = self.contrastive_loss(proj_vec, labels.long())
                mse = self.mse_loss(emo_pred, emotion_score.float()) if emotion_score is not None else 0
                loss = ce + 0.3 * mse + 0.1 * scl
            return {
                "loss": loss,
                "logits": logits,
                "emotion_pred": emo_pred,
                "proj_vec": proj_vec
            }

    model = RobertaSWMHClassifier(num_labels).to(device)

    # Load weights (zip = safetensors / else = .bin)
    if from_zip:
        ckpt_dir = "/content/tmp_ckpt"
        os.makedirs(ckpt_dir, exist_ok=True)
        with zipfile.ZipFile(weights_path, 'r') as zip_ref:
            zip_ref.extractall(ckpt_dir)
        model_path = os.path.join(ckpt_dir, "model.safetensors")
        if not os.path.exists(model_path):
            raise FileNotFoundError("‚ùå No model.safetensors found in ZIP.")
        state_dict = load_file(model_path)
        model.load_state_dict(state_dict, strict=False)
    else:
        state_dict = torch.load(weights_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)

    model.eval()

    args = TrainingArguments(
        output_dir="./results",
        per_device_eval_batch_size=8,
        report_to="none",
        logging_strategy="no",
        save_strategy="no",
        eval_strategy="no"
    )

    trainer = Trainer(
        model=model,
        args=args,
        data_collator=lambda batch: {
            "input_ids": torch.stack([x["input_ids"] for x in batch]),
            "attention_mask": torch.stack([x["attention_mask"] for x in batch]),
            "labels": torch.tensor([x["labels"] for x in batch]),
            "emotion_score": torch.tensor([x["emotion_score"] for x in batch])
        }
    )

    preds = trainer.predict(tokenized_test)
    logits = preds.predictions[0]
    y_pred = np.argmax(logits, axis=-1)
    y_true = preds.label_ids

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")
    print(f"‚úÖ Accuracy: {acc:.4f}")
    print(f"‚úÖ Macro F1: {f1:.4f}")
    print("\nClassification Report:")
    # Get only the labels that actually exist in y_true
    unique_ids = sorted(set(y_true))
    target_names = [id2label[i] for i in unique_ids]

    print(classification_report(y_true, y_pred, labels=unique_ids, target_names=target_names))

    # print(classification_report(y_true, y_pred, target_names=list(label2id.keys())))

    df_test["predicted_label_id"] = y_pred
    df_test["predicted_label"] = df_test["predicted_label_id"].map(id2label)
    df_test["true_label"] = df_test["label_id"].map(id2label)
    df_test[["text", "true_label", "predicted_label"]].to_excel(out_excel, index=False)
    print(f"üì¶ Saved predictions to {out_excel}")


In [9]:
RMHD_weights = '/content/drive/MyDrive/Mansoor_Share/weights/RMHDCombined8_roberta_large_constructive_learning_affinnemotion_scl_v1.zip'
SWMH_weights = '/content/drive/MyDrive/Mansoor_Share/weights/pytorch_modelroberta_emotion_wloss_constructive.bin'
RMHD_kaggle_weights = '/content/drive/MyDrive/Mansoor_Share/weights/rmhdkaggle_pytorch_modelroberta_emotion_wloss_constructive.bin'

test_rmhd    = "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_combined/RMHD_combinedsorted_test.csv"
test_swmh    =  "/content/swmh/test.csv"
test_rmhd_kaggle   =  "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_Kaggle/test.csv"

In [10]:
# SWMH EVALAUTION
evaluate_model(
    csv_path = test_swmh,
    weights_path= SWMH_weights,
    num_labels=5,
    from_zip=False,
    out_excel="SWMH_predictions.xlsx"
)



Map:   0%|          | 0/10883 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.7395
‚úÖ Macro F1: 0.7488

Classification Report:
                   precision    recall  f1-score   support

     self.Anxiety       0.85      0.81      0.83      1911
self.SuicideWatch       0.68      0.74      0.71      2018
     self.bipolar       0.89      0.74      0.81      1493
  self.depression       0.68      0.74      0.71      3774
  self.offmychest       0.73      0.65      0.69      1687

         accuracy                           0.74     10883
        macro avg       0.77      0.74      0.75     10883
     weighted avg       0.75      0.74      0.74     10883

üì¶ Saved predictions to SWMH_predictions.xlsx


In [11]:
# RMHD DATASET
evaluate_model(
    csv_path=test_rmhd,
    weights_path=RMHD_weights,
    num_labels=8,
    from_zip=True,
    out_excel="RMHD_predictions.xlsx"
)

Map:   0%|          | 0/14410 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.8700
‚úÖ Macro F1: 0.7913

Classification Report:
               precision    recall  f1-score   support

         ADHD       0.93      0.94      0.93      1917
      Anxiety       0.81      0.79      0.80      2725
          BPD       0.90      0.67      0.77       915
      Bipolar       0.85      0.58      0.69       211
   Depression       0.89      0.96      0.92      7261
HealthAnxiety       0.85      0.70      0.77       246
         PTSD       0.82      0.74      0.78       262
SocialAnxiety       0.72      0.63      0.67       873

     accuracy                           0.87     14410
    macro avg       0.85      0.75      0.79     14410
 weighted avg       0.87      0.87      0.87     14410

üì¶ Saved predictions to RMHD_predictions.xlsx


In [12]:
# RMHD DATASET
evaluate_model(
    csv_path=test_rmhd_kaggle,
    weights_path=RMHD_kaggle_weights,
    num_labels=5,
    from_zip=False,
    out_excel="RMHDKaggle_predictions.xlsx"
)

Map:   0%|          | 0/38878 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.6965
‚úÖ Macro F1: 0.7091

Classification Report:
              precision    recall  f1-score   support

     anxiety       0.83      0.83      0.83      5896
  depression       0.60      0.65      0.63     10682
      lonely       0.72      0.71      0.71      4305
mentalhealth       0.70      0.61      0.65      8319
suicidewatch       0.72      0.73      0.73      9676

    accuracy                           0.70     38878
   macro avg       0.71      0.71      0.71     38878
weighted avg       0.70      0.70      0.70     38878

üì¶ Saved predictions to RMHDKaggle_predictions.xlsx


CROSS DATA EVALUATION


In [13]:
import pandas as pd

# === LABEL MAPPING DEFINITIONS ===

SWMH_LABELS = {
    "self.Anxiety",
    "self.SuicideWatch",
    "self.bipolar",
    "self.depression",
    "self.offmychest"
}

RMHD_TO_SWMH = {
    "Depression": "self.depression",
    "Anxiety": "self.Anxiety",
    "Bipolar": "self.bipolar",
}

RMHD_KAGGLE_TO_SWMH = {
    "depression": "self.depression",
    "anxiety": "self.Anxiety",
    "suicidewatch": "self.SuicideWatch",
}

SWMH_TO_RMHD = {
    "self.depression": "Depression",
    "self.Anxiety": "Anxiety",
    "self.bipolar": "Bipolar",
}

SWMH_TO_RMHD_KAGGLE = {
    "self.depression": "depression",
    "self.Anxiety": "anxiety",
    "self.SuicideWatch": "suicidewatch",
}

RMHD_KAGGLE_TO_RMHD = {
    "depression" : "Depression",
    "anxiety"    : "Anxiety",
}

RMHD_TO_RMHD_KAGGLE = {
    "Depression": "depression" ,
    "Anxiety": "anxiety"    ,
}

# === LABEL MAPPING FUNCTION ===

def map_labels(df, source, target):
    if source == target:
        return df

    if source == "RMHD" and target == "SWMH":
        mapping = RMHD_TO_SWMH
    elif source == "RMHD_KAGGLE" and target == "SWMH":
        mapping = RMHD_KAGGLE_TO_SWMH
    elif source == "SWMH" and target == "RMHD":
        mapping = SWMH_TO_RMHD
    elif source == "SWMH" and target == "RMHD_KAGGLE":
        mapping = SWMH_TO_RMHD_KAGGLE
    elif source == "RMHD_KAGGLE" and target == "RMHD":
        mapping = RMHD_KAGGLE_TO_RMHD
    elif source == "RMHD" and target == "RMHD_KAGGLE":
        mapping = RMHD_TO_RMHD_KAGGLE
    else:
        raise ValueError(f"Unsupported mapping: {source} ‚Üí {target}")

    df = df.copy()
    df["mapped_label"] = df["label"].map(mapping)
    df = df.dropna(subset=["mapped_label"])
    df["label"] = df["mapped_label"]
    df = df.drop(columns=["mapped_label"])
    return df


In [14]:
#============
#============
RMHD_weights = '/content/drive/MyDrive/Mansoor_Share/weights/RMHDCombined8_roberta_large_constructive_learning_affinnemotion_scl_v1.zip'
SWMH_weights = '/content/drive/MyDrive/Mansoor_Share/weights/pytorch_modelroberta_emotion_wloss_constructive.bin'
RMHD_kaggle_weights = '/content/drive/MyDrive/Mansoor_Share/weights/rmhdkaggle_pytorch_modelroberta_emotion_wloss_constructive.bin'

test_rmhd    = "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_combined/RMHD_combinedsorted_test.csv"
test_swmh    =  "/content/swmh/test.csv"
test_rmhd_kaggle   =  "/content/drive/MyDrive/Mansoor_Share/Datasets/RMHD_Kaggle/test.csv"



In [15]:
# === CROSS EVALUATION CALLS ===

# 1. SWMH model on RMHD
df_rmhd = pd.read_csv(test_rmhd)
df_rmhd_mapped = map_labels(df_rmhd, source="RMHD", target="SWMH")
df_rmhd_mapped.to_csv("RMHD_mapped_to_SWMH.csv", index=False)
evaluate_model(
    csv_path="RMHD_mapped_to_SWMH.csv",
    weights_path=SWMH_weights,
    num_labels=5,
    from_zip=False,
    out_excel="SWMHmodel_on_RMHDmapped.xlsx"
)

# 2. SWMH model on RMHD Kaggle
df_rk = pd.read_csv(test_rmhd_kaggle)
df_rk_mapped = map_labels(df_rk, source="RMHD_KAGGLE", target="SWMH")
df_rk_mapped.to_csv("RMHDKaggle_mapped_to_SWMH.csv", index=False)
evaluate_model(
    csv_path="RMHDKaggle_mapped_to_SWMH.csv",
    weights_path=SWMH_weights,
    num_labels=5,
    from_zip=False,
    out_excel="SWMHmodel_on_RMHDKagglemapped.xlsx"
)


Map:   0%|          | 0/10197 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.2438
‚úÖ Macro F1: 0.1853

Classification Report:
                 precision    recall  f1-score   support

   self.Anxiety       0.91      0.85      0.88      2725
   self.bipolar       0.00      0.02      0.01       211
self.depression       0.39      0.02      0.04      7261

      micro avg       0.59      0.24      0.35     10197
      macro avg       0.43      0.30      0.31     10197
   weighted avg       0.52      0.24      0.26     10197

üì¶ Saved predictions to SWMHmodel_on_RMHDmapped.xlsx


Map:   0%|          | 0/26254 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.4541
‚úÖ Macro F1: 0.3278

Classification Report:
                   precision    recall  f1-score   support

     self.Anxiety       0.92      0.83      0.87      5896
self.SuicideWatch       0.74      0.70      0.72      9676
  self.depression       0.49      0.02      0.04     10682

        micro avg       0.80      0.45      0.58     26254
        macro avg       0.72      0.52      0.55     26254
     weighted avg       0.68      0.45      0.48     26254

üì¶ Saved predictions to SWMHmodel_on_RMHDKagglemapped.xlsx


In [16]:

# 3. RMHD model on SWMH
df_swmh = pd.read_csv(test_swmh)
df_swmh_to_rmhd = map_labels(df_swmh, source="SWMH", target="RMHD")
df_swmh_to_rmhd.to_csv("SWMH_mapped_to_RMHD.csv", index=False)
evaluate_model(
    csv_path="SWMH_mapped_to_RMHD.csv",
    weights_path=RMHD_weights,
    num_labels=8,
    from_zip=True,
    out_excel="RMHDmodel_on_SWMHmapped.xlsx"
)

# 4. RMHD model on RMHD Kaggle
df_rk_to_rmhd = map_labels(df_rk, source="RMHD_KAGGLE", target="RMHD")
df_rk_to_rmhd.to_csv("RMHDKaggle_mapped_to_RMHD.csv", index=False)
evaluate_model(
    csv_path="RMHDKaggle_mapped_to_RMHD.csv",
    weights_path=RMHD_weights,
    num_labels=8,
    from_zip=True,
    out_excel="RMHDmodel_on_RMHDKagglemapped.xlsx"
)


Map:   0%|          | 0/7178 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.0178
‚úÖ Macro F1: 0.0111

Classification Report:
              precision    recall  f1-score   support

     Anxiety       0.16      0.01      0.03      1911
     Bipolar       0.05      0.05      0.05      1493
  Depression       0.32      0.01      0.01      3774

   micro avg       0.07      0.02      0.03      7178
   macro avg       0.18      0.02      0.03      7178
weighted avg       0.22      0.02      0.02      7178

üì¶ Saved predictions to RMHDmodel_on_SWMHmapped.xlsx


Map:   0%|          | 0/16578 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.0200
‚úÖ Macro F1: 0.0085

Classification Report:
              precision    recall  f1-score   support

     Anxiety       0.35      0.02      0.04      5896
  Depression       0.04      0.02      0.03     10682

   micro avg       0.07      0.02      0.03     16578
   macro avg       0.20      0.02      0.03     16578
weighted avg       0.15      0.02      0.03     16578

üì¶ Saved predictions to RMHDmodel_on_RMHDKagglemapped.xlsx


In [17]:

# 5. RMHD Kaggle model on SWMH
df_swmh_to_rk = map_labels(df_swmh, source="SWMH", target="RMHD_KAGGLE")
df_swmh_to_rk.to_csv("SWMH_mapped_to_RMHDKaggle.csv", index=False)
evaluate_model(
    csv_path="SWMH_mapped_to_RMHDKaggle.csv",
    weights_path=RMHD_kaggle_weights,
    num_labels=5,
    from_zip=False,
    out_excel="RMHDKagglemodel_on_SWMHmapped.xlsx"
)

# 6. RMHD Kaggle model on RMHD
df_rmhd_to_rk = map_labels(df_rmhd, source="RMHD", target="RMHD_KAGGLE")
df_rmhd_to_rk.to_csv("RMHD_mapped_to_RMHDKaggle.csv", index=False)
evaluate_model(
    csv_path="RMHD_mapped_to_RMHDKaggle.csv",
    weights_path=RMHD_kaggle_weights,
    num_labels=5,
    from_zip=False,
    out_excel="RMHDKagglemodel_on_RMHDmapped.xlsx"
)


Map:   0%|          | 0/7703 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.5092
‚úÖ Macro F1: 0.3168

Classification Report:
              precision    recall  f1-score   support

     anxiety       0.91      0.78      0.84      1911
  depression       0.80      0.63      0.70      3774
suicidewatch       0.12      0.02      0.04      2018

   micro avg       0.78      0.51      0.62      7703
   macro avg       0.61      0.48      0.53      7703
weighted avg       0.65      0.51      0.56      7703

üì¶ Saved predictions to RMHDKagglemodel_on_SWMHmapped.xlsx


Map:   0%|          | 0/9986 [00:00<?, ? examples/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Accuracy: 0.6891
‚úÖ Macro F1: 0.3278

Classification Report:
              precision    recall  f1-score   support

     anxiety       0.91      0.83      0.87      2725
  depression       0.97      0.64      0.77      7261

   micro avg       0.95      0.69      0.80      9986
   macro avg       0.94      0.73      0.82      9986
weighted avg       0.96      0.69      0.80      9986

üì¶ Saved predictions to RMHDKagglemodel_on_RMHDmapped.xlsx
