In [1]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer

import numpy as np
import pandas as pd

from torch.utils.data import Dataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_df = pd.read_csv("./data/reshuffle_train.tsv", delimiter='\t')
dev_df = pd.read_csv("./data/reshuffle_dev.tsv", delimiter='\t')
test_df = pd.read_csv("./data/reshuffle_test.tsv", delimiter='\t')

In [4]:
train_df

Unnamed: 0.1,Unnamed: 0,PID,text,label
0,1292,dev_pid_1293,A plea for help : The title is true. I need he...,moderate
1,5821,train_pid_5822,When being rational is more dangerous : [removed],moderate
2,1134,dev_pid_1135,Does it get any easier!? : Just looking to hea...,moderate
3,1575,train_pid_1576,I feel like gravity applies to me tenfold : Im...,moderate
4,5288,train_pid_5289,Anyone else not get invited anywhere for New Y...,moderate
...,...,...,...,...
11636,2428,dev_pid_2429,Not Sure If I Have Depression and Need Some He...,not depression
11637,597,dev_pid_598,As my depression gets worse I feel more and mo...,moderate
11638,3773,train_pid_3774,"27M all my friends, peers are moving with thei...",moderate
11639,1606,test_pid_1607,depression and dating are ruining my life : So...,moderate


In [5]:
train_df['label'] = train_df['label'].apply(lambda x: {'not depression': 0, 'moderate': 1, 'severe': 1}.get(x))
test_df['label'] = test_df['label'].apply(lambda x: {'not depression': 0, 'moderate': 1, 'severe': 1}.get(x))
dev_df['label'] = dev_df['label'].apply(lambda x: {'not depression': 0, 'moderate': 1, 'severe': 1}.get(x))

In [6]:
train_df

Unnamed: 0.1,Unnamed: 0,PID,text,label
0,1292,dev_pid_1293,A plea for help : The title is true. I need he...,1
1,5821,train_pid_5822,When being rational is more dangerous : [removed],1
2,1134,dev_pid_1135,Does it get any easier!? : Just looking to hea...,1
3,1575,train_pid_1576,I feel like gravity applies to me tenfold : Im...,1
4,5288,train_pid_5289,Anyone else not get invited anywhere for New Y...,1
...,...,...,...,...
11636,2428,dev_pid_2429,Not Sure If I Have Depression and Need Some He...,0
11637,597,dev_pid_598,As my depression gets worse I feel more and mo...,1
11638,3773,train_pid_3774,"27M all my friends, peers are moving with thei...",1
11639,1606,test_pid_1607,depression and dating are ruining my life : So...,1


In [7]:
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
MAX_LEN = 256
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4

In [8]:
class DepressionDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        '''
        Dataset for labelling sentences with depression.
        '''
        super().__init__()
        self.dataframe = dataframe
        self.inputs = dataframe['text']
        self.outputs = dataframe['label']
        self.tokenizer = tokenizer
        self.max_len = max_len


    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        text = self.inputs.iloc[idx]
        label = self.outputs.iloc[idx]

        inputs = self.tokenizer.encode_plus(
        text,
        None,
        add_special_tokens=True,
        max_length=self.max_len,
        return_token_type_ids=True,
        padding='max_length',
        truncation=True
        )
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
        'ids': torch.tensor(ids, dtype=torch.long),
        'mask': torch.tensor(mask, dtype=torch.long),
        'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
        'targets': torch.tensor(label, dtype=torch.float32)
        }

In [9]:
class RobertaCNNClassifier(nn.Module):
    def __init__(self, model_name="roberta-base", num_filters=128, kernel_size=3, dropout=0.3):
        super().__init__()
        self.roberta = AutoModel.from_pretrained(model_name)
        self.hidden_size = self.roberta.config.hidden_size  # usually 768

        self.conv1d = nn.Conv1d(in_channels=self.hidden_size,
                                out_channels=num_filters,
                                kernel_size=kernel_size,
                                padding=1)  # keep same length

        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveMaxPool1d(1)  # output shape: (B, num_filters, 1)
        self.dropout = nn.Dropout(dropout)

        self.fc = nn.Linear(num_filters, 1)  # binary classification

    def forward(self, input_ids, attention_mask):
        # Encode with RoBERTa
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state  # (B, T, H)

        x = last_hidden_state.permute(0, 2, 1)  # (B, H, T)
        x = self.conv1d(x)                      # (B, num_filters, T)
        x = self.relu(x)
        x = self.pool(x)                        # (B, num_filters, 1)
        x = x.squeeze(2)                        # (B, num_filters)

        x = self.dropout(x)
        logits = self.fc(x)                     # (B, 1)
        return logits

In [10]:
train_dataset = DepressionDataset(train_df, roberta_tokenizer, MAX_LEN)
dev_dataset = DepressionDataset(dev_df, roberta_tokenizer, MAX_LEN)
test_dataset = DepressionDataset(test_df, roberta_tokenizer, MAX_LEN)

In [24]:
train_dataloader = DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=VALID_BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=VALID_BATCH_SIZE)

In [12]:
from sklearn.metrics import accuracy_score, f1_score

def train_model(model, train_loader, val_loader, optimizer, scheduler, device, epochs=3):
    criterion = nn.BCEWithLogitsLoss()
    model.to(device)

    for epoch in range(epochs):
        print(f"\nEpoch {epoch + 1}/{epochs}")

        model.train()
        train_loss = 0
        all_preds, all_labels = [], []

        for batch in train_loader:
            input_ids = batch['ids'].to(device)
            attention_mask = batch['mask'].to(device)
            labels = batch['targets'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask).squeeze(1)  # (B)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            train_loss += loss.item()
            preds = (torch.sigmoid(outputs) > 0.5).int()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        train_acc = accuracy_score(all_labels, all_preds)
        train_f1 = f1_score(all_labels, all_preds)

        print(f"Train Loss: {train_loss / len(train_loader):.4f} | Acc: {train_acc:.4f} | F1: {train_f1:.4f}")

        # --- Validation ---
        model.eval()
        val_loss = 0
        val_preds, val_labels = [], []

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['ids'].to(device)
                attention_mask = batch['mask'].to(device)
                labels = batch['targets'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask).squeeze(1)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                preds = (torch.sigmoid(outputs) > 0.5).int()
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        val_acc = accuracy_score(val_labels, val_preds)
        val_f1 = f1_score(val_labels, val_preds)

        print(f"Val Loss: {val_loss / len(val_loader):.4f} | Acc: {val_acc:.4f} | F1: {val_f1:.4f}")

In [13]:
from transformers import get_scheduler

num_epochs = 3
model = RobertaCNNClassifier()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
scheduler = get_scheduler("linear", optimizer=optimizer,
                          num_warmup_steps=0,
                          num_training_steps=len(dev_dataloader) * num_epochs)

train_model(model, train_dataloader, dev_dataloader, optimizer, scheduler, device, epochs=3)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/3
Train Loss: 0.4627 | Acc: 0.7943 | F1: 0.8683
Val Loss: 0.4074 | Acc: 0.8189 | F1: 0.8845

Epoch 2/3
Train Loss: 0.3401 | Acc: 0.8555 | F1: 0.9039
Val Loss: 0.3950 | Acc: 0.8229 | F1: 0.8835

Epoch 3/3
Train Loss: 0.3287 | Acc: 0.8606 | F1: 0.9071
Val Loss: 0.3950 | Acc: 0.8229 | F1: 0.8835


In [14]:
torch.save(model.state_dict(), "roberta_cnn_depression.pt")

In [15]:
# loading the model
# model = RobertaCNNClassifier()  # must match original architecture
# model.load_state_dict(torch.load("roberta_cnn_depression.pt"))
# model.to(device)
# model.eval()  # set to inference mode

In [18]:
def predict_depression(text, model, tokenizer, max_len=256, device=device):
    # Tokenize input text
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=max_len,
        padding='max_length',
        truncation=True,
        return_token_type_ids=True,
        return_attention_mask=True,
        return_tensors='pt'  # returns PyTorch tensors directly
    )

    # Move to device
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    # Inference
    with torch.no_grad():
        logits = model(input_ids=input_ids, attention_mask=attention_mask)
        prob = torch.sigmoid(logits).item()  # Convert tensor to scalar

    # Convert probability to binary label
    label = 1 if prob > 0.5 else 0
    return label, prob

In [43]:
# text = "I do not feel joy, happiness or fulfilment in the things I used to love."
# text = "That movie was so depressing, it made me want to crawl into bed and cry — but wow, what a powerful story."
text = "I didn’t feel joy, happiness or fulfilment in the hobbies I used to love — until I found new passions that reignited my spark."

model.eval()
label, prob = predict_depression(text, model, roberta_tokenizer)

print(f"Prediction: {label} (probability of depression = {prob:.4f})")


Prediction: 0 (probability of depression = 0.4290)


In [27]:
def evaluate_model(model, test_loader, device=device):
    model.eval()
    model.to(device)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['ids'].to(device)
            attention_mask = batch['mask'].to(device)
            labels = batch['targets'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask).squeeze(1)
            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).long()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    print(f"\nTest Accuracy: {acc:.4f}")
    print(f"Test F1 Score : {f1:.4f}")

    return all_preds, all_labels

In [28]:
all_preds, all_labels = evaluate_model(model, test_dataloader, device)


Test Accuracy: 0.8269
Test F1 Score : 0.8860
