# **Fine-Tuning T5-small for Mental Health Explanations**

Step 1: Data Preprocessing

In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [48]:
# Load the dataset
data = pd.read_csv('survey.csv')

In [49]:
# Drop unnecessary columns and handle missing values
data = data.drop(columns=["Unnamed: 0"])
data = data.dropna(subset=["statement"])

In [50]:
# Format data for T5: Input as "explain: {statement}" and Output as "{status}"
data["input"] = "explain: " + data["statement"]
data["output"] = data["status"]

In [51]:
# Split into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

In [52]:
# Save processed data
train_data.to_csv("train_data.csv", index=False)
val_data.to_csv("val_data.csv", index=False)

Step 2: Load Pretrained Model and Tokenizer

In [53]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [55]:
# Load the tokenizer and model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

Step 3: Prepare the Data for Fine-Tuning

In [56]:
from torch.utils.data import Dataset, DataLoader
import torch

In [57]:
# Define a custom dataset class
class MentalHealthDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        input_text = self.data.iloc[idx]["input"]
        output_text = self.data.iloc[idx]["output"]

        # Tokenize input and output
        input_encodings = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        output_encodings = self.tokenizer(
            output_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": input_encodings["input_ids"].squeeze(),
            "attention_mask": input_encodings["attention_mask"].squeeze(),
            "labels": output_encodings["input_ids"].squeeze(),
        }

In [58]:
# Load the preprocessed data
train_data = pd.read_csv("train_data.csv")
val_data = pd.read_csv("val_data.csv")

In [59]:
# Create datasets
train_dataset = MentalHealthDataset(train_data, tokenizer)
val_dataset = MentalHealthDataset(val_data, tokenizer)

In [60]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

Step 4: Fine-Tune the T5-Small Model

In [14]:
from transformers import AdamW
from tqdm import tqdm

In [15]:
# Define optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)



In [16]:
# Set model to training mode
model.train()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [17]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [18]:
# Early Stopping Parameters
patience = 3  # Number of epochs to wait for improvement before stopping
best_val_loss = float('inf')
epochs_no_improve = 0

# Training loop
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    epoch_loss = 0

    # Training phase
    model.train()  # Set model to training mode
    for batch in tqdm(train_loader):
        # Move batch to device
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        epoch_loss += loss.item()

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch Loss: {epoch_loss:.4f}")

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0
    with torch.no_grad():
        for val_batch in val_loader:
            val_input_ids = val_batch["input_ids"].to(device)
            val_attention_mask = val_batch["attention_mask"].to(device)
            val_labels = val_batch["labels"].to(device)

            val_outputs = model(input_ids=val_input_ids, attention_mask=val_attention_mask, labels=val_labels)
            val_loss += val_outputs.loss.item()

    print(f"Validation Loss: {val_loss:.4f}")

    # Early Stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        print("Validation loss improved, saving model...")
        # You could also save the model checkpoint here
        # torch.save(model.state_dict(), 'best_model.pth')
    else:
        epochs_no_improve += 1
        print(f"Validation loss did not improve for {epochs_no_improve} epochs.")

    if epochs_no_improve >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break

Epoch 1/10


  0%|          | 0/2634 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
100%|██████████| 2634/2634 [12:20<00:00,  3.56it/s]


Epoch Loss: 623.1001
Validation Loss: 5.8816
Validation loss improved, saving model...
Epoch 2/10


100%|██████████| 2634/2634 [12:11<00:00,  3.60it/s]


Epoch Loss: 26.1228
Validation Loss: 4.4101
Validation loss improved, saving model...
Epoch 3/10


100%|██████████| 2634/2634 [12:11<00:00,  3.60it/s]


Epoch Loss: 19.0607
Validation Loss: 3.4212
Validation loss improved, saving model...
Epoch 4/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 15.3922
Validation Loss: 2.9740
Validation loss improved, saving model...
Epoch 5/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 13.4152
Validation Loss: 2.7304
Validation loss improved, saving model...
Epoch 6/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 12.1575
Validation Loss: 2.5754
Validation loss improved, saving model...
Epoch 7/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 11.2121
Validation Loss: 2.4673
Validation loss improved, saving model...
Epoch 8/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 10.3995
Validation Loss: 2.3840
Validation loss improved, saving model...
Epoch 9/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 9.9178
Validation Loss: 2.3702
Validation loss improved, saving model...
Epoch 10/10


100%|██████████| 2634/2634 [12:12<00:00,  3.60it/s]


Epoch Loss: 9.4434
Validation Loss: 2.3290
Validation loss improved, saving model...


Step 5: Evaluate the Model

In [19]:
from sklearn.metrics import classification_report

In [20]:
# Set model to evaluation mode
model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [21]:
# Generate predictions
all_labels = []
all_preds = []

In [22]:
with torch.no_grad():
    for batch in tqdm(val_loader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=128)

        # Decode predictions and labels
        preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        labels = tokenizer.batch_decode(batch["labels"], skip_special_tokens=True)

        all_preds.extend(preds)
        all_labels.extend(labels)

100%|██████████| 659/659 [02:55<00:00,  3.76it/s]


In [23]:
# Print classification report
print(classification_report(all_labels, all_preds))

                      precision    recall  f1-score   support

             Anxiety       0.86      0.86      0.86       755
             Bipolar       0.77      0.85      0.81       527
          Depression       0.76      0.76      0.76      3016
              Normal       0.95      0.95      0.95      3308
Personality disorder       0.67      0.65      0.66       237
              Stress       0.70      0.76      0.73       536
            Suicidal       0.72      0.69      0.70      2158

            accuracy                           0.81     10537
           macro avg       0.78      0.79      0.78     10537
        weighted avg       0.81      0.81      0.81     10537



In [26]:
# Save the fine-tuned model
model.save_pretrained("t5_mental_health_model")
tokenizer.save_pretrained("t5_mental_health_model")

('t5_mental_health_model/tokenizer_config.json',
 't5_mental_health_model/special_tokens_map.json',
 't5_mental_health_model/spiece.model',
 't5_mental_health_model/added_tokens.json')