In [None]:
# -------------------------------
# 1️⃣ Uninstall conflicting versions
# -------------------------------
!pip uninstall -y torch torchtext torchvision torchaudio numpy

# -------------------------------
# 2️⃣ Install compatible versions
# For vanilla RNN + IMDB
# torch 2.3.0, torchtext 0.18.0, torchvision/torchaudio matching
# numpy 1.26.4 (avoids PyTorch errors)
# -------------------------------
!pip install torch==2.3.0 torchtext==0.18.0 torchvision==0.18.0 torchaudio==2.3.0 numpy==1.26.4 --quiet

# -------------------------------
# 3️⃣ Restart runtime (required to load new versions)
# -------------------------------
import os
os.kill(os.getpid(), 9)  # This forces Colab to restart


Found existing installation: torch 2.3.0
Uninstalling torch-2.3.0:
  Successfully uninstalled torch-2.3.0
Found existing installation: torchtext 0.18.0
Uninstalling torchtext-0.18.0:
  Successfully uninstalled torchtext-0.18.0
Found existing installation: torchvision 0.18.0
Uninstalling torchvision-0.18.0:
  Successfully uninstalled torchvision-0.18.0
Found existing installation: torchaudio 2.3.0
Uninstalling torchaudio-2.3.0:
  Successfully uninstalled torchaudio-2.3.0
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 whi

In [1]:
import math, random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator



In [3]:
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [42]:
df = pd.read_csv('mental.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,statement,status
0,0,oh my gosh,Anxiety
1,1,"trouble sleeping, confused mind, restless hear...",Anxiety
2,2,"All wrong, back off dear, forward doubt. Stay ...",Anxiety
3,3,I've shifted my focus to something else but I'...,Anxiety
4,4,"I'm restless and restless, it's been a month n...",Anxiety


In [43]:
df['status'].unique()

array(['Anxiety', 'Normal', 'Depression', 'Suicidal', 'Stress', 'Bipolar',
       'Personality disorder'], dtype=object)

In [44]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [45]:
texts = df['statement'].astype(str).tolist()
labels = df['status'].astype(str).tolist()

In [46]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)
num_classes = len(label_encoder.classes_)
print("Classes:", label_encoder.classes_)

Classes: ['Anxiety' 'Bipolar' 'Depression' 'Normal' 'Personality disorder' 'Stress'
 'Suicidal']


In [47]:
x_train, x_test, y_train, y_test = train_test_split(texts, y, test_size = 0.2, random_state = RANDOM_SEED, stratify = labels)


In [48]:
print(f'Train : {len(x_train)}, Test : {len(x_test)}')

Train : 42434, Test : 10609


In [49]:
tokenizer = get_tokenizer('basic_english')

def yield_tokens(data_iter):
  for text in data_iter:
    yield tokenizer(text)

In [50]:
vocab = build_vocab_from_iterator(yield_tokens(x_train), specials = ['<unk>', '<pad>'])
vocab.set_default_index(vocab['<unk>'])
PAD_IDX = vocab['<pad>']
VOCAB_SIZE = len(vocab)
print('Vocab size:', VOCAB_SIZE)

Vocab size: 64279


In [51]:
class TextDataset(Dataset):
  def __init__(self, texts, labels, vocab, tokenizer):
    self.texts = texts
    self.labels = labels
    self.vocab = vocab
    self.tokenizer = tokenizer

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    tokens = self.tokenizer(self.texts[idx])
    ids = [self.vocab[t] for t in tokens]
    return torch.tensor(ids, dtype = torch.long), torch.tensor(self.labels[idx], dtype = torch.long)

In [52]:
max_len = 60

In [68]:
def collate_fn(batch):
  texts, labels = zip(*batch)
  texts = [t[:max_len] for t in texts]
  padded = pad_sequence(texts, batch_first=True, padding_value = PAD_IDX)
  if padded.size(1) < max_len:
    pad_extra = torch.full((padded.size(0), max_len - padded.size(1)), PAD_IDX, dtype = torch.long)
    padded = torch.cat([padded, pad_extra], dim =1)
  labels = torch.stack(labels)
  key_padding_mask = (padded == PAD_IDX)
  return padded, labels, key_padding_mask

train_ds = TextDataset(x_train, y_train, vocab, tokenizer)
test_ds = TextDataset(x_test, y_test, vocab, tokenizer)

train_loader = DataLoader(train_ds, batch_size = 16, shuffle = True, collate_fn = collate_fn)
test_loader = DataLoader(test_ds, batch_size = 16, shuffle = False, collate_fn = collate_fn)

In [69]:
class PositionalEncoding(nn.Module):
  def __init__(self, d_model, max_len = 5000):
    super().__init__()
    pe = torch.zeros(max_len, d_model)
    position = torch.arange(0, max_len).unsqueeze(1).float()
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    pe = pe.unsqueeze(0)
    self.register_buffer('pe', pe)

  def forward(self, x):
    return x + self.pe[:, :x.size(1), :]


In [70]:
class TransformerEncoder(nn.Module):
  def __init__(self, embed_dim, num_heads, ff_hidden_dim, dropout):
    super().__init__()
    self.attn = nn.MultiheadAttention(embed_dim, num_heads, dropout = dropout, batch_first=True)
    self.ff = nn.Sequential(
        nn.Linear(embed_dim, ff_hidden_dim),
        nn.ReLU(),
        nn.Linear(ff_hidden_dim, embed_dim),
    )
    self.norm1 = nn.LayerNorm(embed_dim)
    self.norm2 = nn.LayerNorm(embed_dim)
    self.dropout = nn.Dropout(dropout)

  def forward(self, x, key_padding_mask=None):
    attn_out, _ = self.attn(x, x, x, key_padding_mask = key_padding_mask)
    x = self.norm1(x + self.dropout(attn_out))
    ff_out = self.ff(x)
    x = self.norm2(x + self.dropout(ff_out))
    return x

In [71]:
class MiniTransformer(nn.Module):
  def __init__(self, vocab_size, embed_dim, num_heads, ff_hidden_dim, num_layers, num_classes, pad_idx, dropout = 0.2):
    super().__init__()
    self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx= pad_idx)
    self.pos_enc = PositionalEncoding(embed_dim)
    self.layers = nn.ModuleList([
        TransformerEncoder(embed_dim, num_heads, ff_hidden_dim, dropout)
        for _ in range(num_layers)
    ])
    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear(embed_dim, num_classes)

  def forward(self, x, key_padding_mask = None):
    x = self.embedding(x)
    x = self.pos_enc(x)
    for layer in self.layers:
      x = layer(x, key_padding_mask)
    mask = ~key_padding_mask
    pooled = (x * mask.unsqueeze(-1)).sum(1) / mask.sum(1, keepdim = True)
    return self.fc(self.dropout(pooled))

In [84]:
Embed_dim = 128
Num_heads = 8
FF_hidden = 256
Num_layers = 3
Lr = 1e-3
epochs = 10

In [85]:
model = MiniTransformer(VOCAB_SIZE, Embed_dim, Num_heads, FF_hidden, Num_layers, num_classes, pad_idx=PAD_IDX).to(device)

In [86]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = Lr)

In [87]:
print('Training on :', device)
for epoch in range(epochs + 1):
  model.train()
  total_loss = 0
  train_preds, train_true = [], []
  for x, y, mask in train_loader:
    x, y, mask = x.to(device), y.to(device), mask.to(device)
    optimizer.zero_grad()
    logits = model(x, mask)
    loss = loss_fn(logits, y)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
    preds = logits.argmax(1).cpu().numpy()
    train_preds.extend(preds)
    train_true.extend(y.cpu().numpy())
  avg_loss = total_loss / len(train_loader)
  train_acc = accuracy_score(train_preds, train_true)
  print(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {train_acc:.4f}')

Training on : cuda
Epoch 0 | Train Loss: 0.8655 | Train Accuracy: 0.6568
Epoch 1 | Train Loss: 0.7158 | Train Accuracy: 0.7142
Epoch 2 | Train Loss: 0.6670 | Train Accuracy: 0.7340
Epoch 3 | Train Loss: 0.6343 | Train Accuracy: 0.7495
Epoch 4 | Train Loss: 0.6131 | Train Accuracy: 0.7608
Epoch 5 | Train Loss: 0.5865 | Train Accuracy: 0.7698
Epoch 6 | Train Loss: 0.5721 | Train Accuracy: 0.7762
Epoch 7 | Train Loss: 0.5609 | Train Accuracy: 0.7831
Epoch 8 | Train Loss: 0.5445 | Train Accuracy: 0.7887
Epoch 9 | Train Loss: 0.5466 | Train Accuracy: 0.7878
Epoch 10 | Train Loss: 0.5439 | Train Accuracy: 0.7902


In [88]:
model.eval()
all_preds, all_true = [], []
with torch.no_grad():
  for x, y, mask in test_loader:
    x, y, mask = x.to(device), y.to(device), mask.to(device)
    logits = model(x, mask)
    preds = logits.argmax(1).cpu().numpy()
    all_preds.extend(preds)
    all_true.extend(y.cpu().numpy())

print('\nTest Accuracy : ', accuracy_score(all_true, all_preds))
print('\nClassification Report : ', classification_report(all_true, all_preds))


Test Accuracy :  0.718823640305401

Classification Report :                precision    recall  f1-score   support

           0       0.71      0.71      0.71       778
           1       0.68      0.68      0.68       575
           2       0.72      0.58      0.64      3081
           3       0.88      0.92      0.90      3270
           4       0.83      0.22      0.34       240
           5       0.44      0.51      0.47       534
           6       0.58      0.73      0.65      2131

    accuracy                           0.72     10609
   macro avg       0.69      0.62      0.63     10609
weighted avg       0.73      0.72      0.72     10609



In [89]:
def predict(model, text):
  model.eval()
  toks = tokenizer(text)
  ids = torch.tensor([vocab[t] for t in toks], dtype = torch.long).unsqueeze(0).to(device)
  if ids.size(1) < max_len:
    pad = torch.full((1, max_len - ids.size(1)), PAD_IDX, dtype = torch.long).to(device)
    ids = torch.cat([ids, pad], dim = 1)
  mask = (ids == PAD_IDX)
  with torch.no_grad():
    out = model(ids, mask)
    pred = out.argmax(1).item()
  return label_encoder.inverse_transform([pred])[0]


In [96]:
samples = [
    "I am feeling so lonely",
    "I've been so restless since this morning",
    "I wish you all the best as well, from the bottom of my heart."
]

for s in samples:
  print(f'{s} --> {predict(model, s)}')

I am feeling so lonely --> Suicidal
I've been so restless since this morning --> Anxiety
I wish you all the best as well, from the bottom of my heart. --> Normal
