In [None]:
! git clone https://github.com/usmaann/Depression_Severity_Dataset.git

In [None]:
! pip install torch transformers numpy pandas matplotlib tensorflow nltk emoji ekphrasis scikit-learn numpy

In [None]:
import re
import nltk
import emoji
import torch
import numpy as np
from ekphrasis.classes.segmenter import Segmenter
from ekphrasis.classes.spellcorrect import SpellCorrector
import transformers
import tensorflow as tf
import pandas as pd
from torch import nn
from torch.nn import Linear, Softmax, ReLU, CrossEntropyLoss
from torch.nn.functional import one_hot
from torch.optim import Adam
from transformers import AutoModel, AutoTokenizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [None]:
depression_df = pd.read_csv("Depression_Severity_Dataset/Reddit_depression_dataset.csv")
depression_df.head()

Unnamed: 0,text,label
0,"He said he had not felt that way before, sugge...",mild
1,"Hey there r/assistance, Not sure if this is th...",minimum
2,My mom then hit me with the newspaper and it s...,minimum
3,"until i met my new boyfriend, he is amazing, h...",mild
4,October is Domestic Violence Awareness Month a...,moderate


In [None]:
X = depression_df['text']
Y = depression_df['label']

In [None]:
Y.value_counts()

label
minimum     2587
moderate     394
mild         290
severe       282
Name: count, dtype: int64

In [None]:
Y.values

array(['mild', 'minimum', 'minimum', ..., 'mild', 'minimum', 'minimum'],
      dtype=object)

In [None]:
label_map = {'minimum': 0, 'mild': 1, 'moderate': 2, 'severe': 3}

In [None]:
Y_encoded = []
for i, label in enumerate(Y):
  Y_encoded.append(label_map[label])

In [None]:
Y_encoded

In [None]:
Y_tensor = torch.tensor(Y_encoded)

In [None]:
Y_one_hot = one_hot(Y_tensor)

In [None]:
Y_one_hot[0], Y[0]

(tensor([0, 1, 0, 0]), 'mild')

In [None]:
labelEncoder = LabelEncoder()

In [None]:
# prompt: encode the labels in Y

Y = labelEncoder.fit_transform(Y)
Y = to_categorical(Y)

In [None]:
Y.shape

(3553, 4)

## **Preprocess text**

In [None]:
post = depression_df['text'][0]
segmenter = Segmenter()
spellCorrector = SpellCorrector(corpus="english")

Word statistics files not found!
Downloading... done!
Unpacking... done!
Reading english - 1grams ...
generating cache file for faster loading...
reading ngrams /root/.ekphrasis/stats/english/counts_1grams.txt
Reading english - 2grams ...
generating cache file for faster loading...
reading ngrams /root/.ekphrasis/stats/english/counts_2grams.txt


  regexes = {k.lower(): re.compile(self.expressions[k]) for k, v in


Reading english - 1grams ...


In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
# post = spellCorrector.correct(post)
post = segmenter.segment(post)
post = emoji.demojize(post)
post = re.sub(r'#\w+', '', post)
post = re.sub(r'http\S+|www\S+|\S+@\S+|\d+', '', post)
post = re.sub(r'([a-zA-Z])\1{2,}', r'\1\1', post)
sentences = nltk.sent_tokenize(post)


In [None]:
sentences

['he said he had not felt that way before, suggeted i go rest and so ..trigger ahead if youi\'re a hypocondriac like me: i decide to look up "feelings of doom" in hopes of maybe getting sucked into some rabbit hole of ludicrous conspiracy, a stupid "are you psychic" test or new age b.s., something i could even laugh at down the road.',
 'no, i ended up reading that this sense of doom can be indicative of various health ailments; one of which i am prone to..  so on top of my "doom" to my gloom..i am now f\'n worried about my heart.',
 'i do happen to have a physical in   hours.']

In [None]:
len(sentences)

3

## **Semantic Encoder**

In [None]:
semantic_encoder_model = AutoModel.from_pretrained("bert-base-uncased")
semantic_encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

# **Sentiment Encoder**

In [None]:
sentiment_encoder_model = AutoModel.from_pretrained("distilbert-base-uncased")
sentiment_encoder_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
# post = depression_df['text'][13]
semantic_tokens = semantic_encoder_tokenizer(post, return_tensors='pt')
sentiment_tokens = sentiment_encoder_tokenizer(post, return_tensors='pt')

semantic_embeddings = semantic_encoder_model(**semantic_tokens)

sentiment_embeddings = sentiment_encoder_model(**sentiment_tokens)


In [None]:
semantic_embeddings = semantic_embeddings.last_hidden_state
sentiment_embeddings = sentiment_embeddings.last_hidden_state

In [None]:
semantic_embeddings.shape, sentiment_embeddings.shape

(torch.Size([1, 156, 768]), torch.Size([1, 156, 768]))

## **Sentiment Guided Transformer**

In [None]:
self_attn = nn.MultiheadAttention(768, 8, dropout=0.1)

In [None]:
sentiment_attention, attention_score = self_attn(sentiment_embeddings, sentiment_embeddings, sentiment_embeddings)

In [None]:
semantic_attention, semantic_score = self_attn(semantic_embeddings, semantic_embeddings, semantic_embeddings)

In [None]:
semantic_attention.shape, sentiment_attention.shape

(torch.Size([1, 156, 768]), torch.Size([1, 156, 768]))

In [None]:
co_attention, co_attention_score = self_attn(semantic_attention, semantic_attention, sentiment_attention)

In [None]:
co_attention.shape

torch.Size([1, 156, 768])

## **Supervised Severity Aware Contrastive learning**

In [None]:
def soft_attention(encoder_outputs):
  # Compute attention scores
  linear = Linear(768, 768)
  energy = torch.tanh(linear(encoder_outputs))

  # Compute attention weights
  softmax = Softmax(dim=1)
  attention_weights = softmax(energy)

  # Compute weighted sum of encoder outputs
  context_vector = torch.sum(encoder_outputs * attention_weights, dim=1)

  return context_vector, attention_weights

In [None]:
context_vector_semantic, attention_weights_semantic = soft_attention(semantic_attention)

In [None]:
context_vector_sentiment, attention_weights = soft_attention(co_attention)

In [None]:
combined_features = torch.cat((context_vector_semantic, context_vector_sentiment), 1)

In [None]:
combined_features.shape

torch.Size([1, 1536])

In [None]:
class NeuralNetwork(nn.Module):
  def __init__(self, first_neurons, n_hidden_neurons):
    super().__init__()
    self.fc1 = Linear(first_neurons, n_hidden_neurons)
    self.activ1 = ReLU()
    self.fc2 = Linear(n_hidden_neurons, n_hidden_neurons)
    self.activ2 = ReLU()
    self.fc3 = Linear(n_hidden_neurons, 4)
    self.softmax = Softmax()

  def forward(self, x):
    x = self.fc1(x)
    x = self.activ1(x)

    x = self.fc2(x)
    x = self.activ2(x)

    x = self.fc3(x)
    x = self.softmax(x)

    return x

In [None]:
model = NeuralNetwork(1536, 512)
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [None]:
print(combined_features.shape)
print(model.fc1.weight.shape)

torch.Size([1, 1536])
torch.Size([512, 1536])


In [None]:
print(model)

NeuralNetwork(
  (fc1): Linear(in_features=1536, out_features=512, bias=True)
  (activ1): ReLU()
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (activ2): ReLU()
  (fc3): Linear(in_features=512, out_features=4, bias=True)
  (softmax): Softmax(dim=None)
)


In [None]:
flatten = nn.Flatten()
combined_features_flattened = flatten(combined_features)

In [None]:
combined_features_flattened.shape

torch.Size([1, 1536])

In [None]:
optimizer.zero_grad()
preds = model.forward(combined_features)

  return self._call_impl(*args, **kwargs)


In [None]:
preds.shape, Y_one_hot[0].shape

(torch.Size([1, 4]), torch.Size([4]))

In [None]:
preds, Y_one_hot[0]

(tensor([[0.2348, 0.2504, 0.2569, 0.2579]], grad_fn=<SoftmaxBackward0>),
 tensor([0, 1, 0, 0]))

In [None]:
loss = loss_fn(preds, Y_one_hot[0])

ValueError: Expected input batch_size (1) to match target batch_size (4).