# Voting System

The fien-tuned model for classification can be found at [here](https://drive.google.com/file/d/107vHAbHNqG05WlDmNSzV7m9vET72AVe_/view?usp=sharing)

In [1]:
!pip install transformers datasets evaluate accelerate sentencepiece



In [2]:
import torch
from transformers import AutoTokenizer, AutoModel, pipeline
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import f1_score, accuracy_score
from ast import literal_eval
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

try:
    from google.colab import drive
    drive.mount('/content/gdrive')

    train_path = '/content/gdrive/MyDrive/advanced-ml-project/data/train.tsv'
    test_path = '/content/gdrive/MyDrive/advanced-ml-project/data/test.tsv'
    dev_path = '/content/gdrive/MyDrive/advanced-ml-project/data/dev.tsv'
    emotion_train_path = '/content/gdrive/MyDrive/advanced-ml-project/data/train_emotion.csv'
    emotion_test_path = '/content/gdrive/MyDrive/advanced-ml-project/data/test_emotion.csv'
    emotion_dev_path = '/content/gdrive/MyDrive/advanced-ml-project/data/dev_emotion.csv'

    classification_model_path = '/content/gdrive/MyDrive/advanced-ml-project/bert-depression-detection.pth'
except:
    train_path = 'data/train.tsv'
    test_path = 'data/test.tsv'
    dev_path = 'data/dev.tsv'
    emotion_train_path = 'data/train_emotion.csv'
    emotion_test_path = 'data/test_emotion.csv'
    emotion_dev_path = 'data/dev_emotion.csv'

    masked_model_path = 'masked_model/'
    classification_model_path = 'bert-depression-detection.pth'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

Mounted at /content/gdrive


device(type='cuda', index=0)

## Load Data

In [3]:
label2idx = {'moderate': 0, 'not depression': 1, 'severe':2}
idx2label = {0: 'moderate', 1: 'not depression', 2: 'severe'}

In [4]:
test = pd.read_csv(emotion_test_path, sep='\t')
test['emotion_scores'] = test['emotion_scores'].apply(literal_eval)
test['label'] = test['label'].apply(lambda x: label2idx[x])

print('Length of Data:', len(test))
print(test.label.value_counts())
test.head(10)

Length of Data: 3245
0    2169
1     848
2     228
Name: label, dtype: int64


Unnamed: 0,PID,text,label,emotion_scores
0,test_pid_1,Im scared : This is it. I lie to myself every ...,0,"[5.375104904174805, -0.8492379784584045, -1.99..."
1,test_pid_2,New to this but just wanted to vent : I just f...,0,"[3.0481295585632324, 2.1657772064208984, -2.13..."
2,test_pid_3,I’m sad : It’s kinda always been an issue. I w...,0,"[-0.5275790691375732, -0.36453449726104736, -1..."
3,test_pid_4,Lonely but not alone. : All of my immediately ...,0,"[4.492433071136475, 0.433444082736969, -2.2141..."
4,test_pid_5,This year has been trash. : I dont know why I’...,0,"[0.8373550772666931, 1.134714961051941, -1.501..."
5,test_pid_6,Needed to yell into the void : I'm a pos. I'm ...,0,"[5.4630208015441895, -1.3103712797164917, -1.7..."
6,test_pid_7,I don’t know why I’m here anymore : I feel lik...,0,"[-1.5854066610336304, -1.2980746030807495, -1...."
7,test_pid_8,i dont want to be here anymore : i dont wanna ...,0,"[2.5347487926483154, 0.692678689956665, -2.158..."
8,test_pid_9,Antidepressants : Do antidepressants help if y...,0,"[-0.4418915808200836, -1.912025809288025, -2.0..."
9,test_pid_10,TMS : My doctor wants me to do TMS for my depr...,0,"[-3.599724531173706, 1.7751812934875488, 1.598..."


In [5]:
X = test[['text', 'emotion_scores']].values
y = test['label'].values
print(X.shape)
print(y.shape)

(3245, 2)
(3245,)


In [6]:
class MyDataSet(Dataset):
    def __init__(self, X, y, max_len=512):
        self.X = X
        self.y = y
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.max_len = max_len

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        prompt = "The level of depression in this tweet is {}.\n{}"
        text = self.X[index][0]
        label = self.y[index]
        features = self.X[index][1]
        encoding = self.tokenizer(text, padding='max_length', truncation=True, return_tensors='pt', max_length=self.max_len)

        return {
            'text': text,
            'masked_text': prompt.format('<mask>', text),
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'token_type_ids': encoding['token_type_ids'].squeeze(),
            'features': torch.tensor(features),
            'label': torch.tensor(label),
        }

test_data = MyDataSet(X, y)
test_loader = DataLoader(test_data, batch_size=5, shuffle=True)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

## Build Model

### MaskedML model

In [7]:
mask_filler = pipeline(
    "fill-mask",
    'kwang123/MaskedLM-roberta-large',
    device=device,
    batch_size=16,
)

config.json:   0%|          | 0.00/675 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

### Classification model

In [8]:
class DepressionClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = AutoModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.1)
        self.linear1 = nn.Linear(self.classifier.config.hidden_size + 5, 3)
        """ Initialize the weights of linear layer."""
        nn.init.xavier_normal_(self.linear1.weight)

    def forward(self, input_ids, token_type_ids, attention_mask, features):
        output = self.classifier(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
        output = output.last_hidden_state[:, 0, :] # [batch size, hidden size]
        output = self.dropout(output)
        output = torch.cat((output, features), dim=-1) # [batch size, hidden size+num extra features]
        output = self.linear1(output) # [batch size, num labels]
        return output

model = DepressionClassifier().to(device)
model.load_state_dict(torch.load(classification_model_path))

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

<All keys matched successfully>

### Zero-shot classification model

In [9]:
classifier = pipeline(
    "zero-shot-classification",
    model="kwang123/roberta-base-nli",
    device=device,
    batch_size=64,
)

config.json:   0%|          | 0.00/889 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


## Evaluation (Classification + MaskedML)

Weight for MaskedLM model: $0.3786 \div (0.7278 + 0.3786)$

Weight for Classification model: $0.7278 \div (0.7278 + 0.3786)$

In [10]:
def get_logits(preds):
    logits = []
    for i in preds:
        tmp = {'moderate': 0, 'healthy': 1, 'severe':2}
        for j in i:
            tmp[j['token_str'].strip()] = j['score']
        logits.append([tmp['moderate'], tmp['healthy'], tmp['severe']])
    logits = F.softmax(torch.tensor(logits)*1000, dim=1)
    return logits

In [11]:
model.eval()

masked_weight = 0.3786 / (0.7278 + 0.3786)
classification_weight = 0.7278 / (0.7278 + 0.3786)

preds = []
y_true = []
for i, inputs in enumerate(tqdm(test_loader, leave=False, desc="Evaluating")):
    masked_text = inputs['masked_text']
    input_ids = inputs['input_ids'].to(device)
    token_type_ids = inputs['token_type_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    features = inputs['features'].to(device)
    label = inputs['label'].to(device)

    with torch.no_grad():
        classification_logits = model(input_ids, token_type_ids, attention_mask, features).cpu()
    classification_logits = F.softmax(classification_logits, dim=1)

    predictions = mask_filler(
        masked_text,
        top_k=3,
        targets=['moderate', 'healthy', 'severe'],
        tokenizer_kwargs={'padding': 'max_length', 'truncation': True, 'max_length': 512}
    )
    masked_logits = get_logits(predictions)

    classification_logits *= classification_weight
    masked_logits *= masked_weight
    logits = classification_logits + masked_logits

    y_pred = torch.argmax(logits, 1)

    preds += y_pred.cpu().tolist()
    y_true += label.cpu().tolist()

weighted = f1_score(np.array(y_true), np.array(preds), average='weighted')
macro = f1_score(np.array(y_true), np.array(preds), average='macro')
accuracy = accuracy_score(np.array(y_true), np.array(preds))
print("Weighted F1 score: %.4f, Macro F1 score: %.4f, Accuracy: %.4f" % (weighted, macro, accuracy))

Evaluating:   0%|          | 0/649 [00:00<?, ?it/s]



Weighted F1 score: 0.8488, Macro F1 score: 0.8044, Accuracy: 0.8453


## Evaluation (Classification + Zero-shot classification)

Weight for zero-shot classification model $0.7410 \div (0.7278 + 0.7410)$

Weight for classification model $0.7278 \div (0.7278 + 0.7410)$

In [12]:
def get_logits_zero_shot(predictions):
    result = []
    for each in predictions:
        tmp = []
        moderate = each['labels'].index('moderate')
        not_depression = each['labels'].index('not depression')
        severe = each['labels'].index('severe')
        tmp.append(each['scores'][moderate])
        tmp.append(each['scores'][not_depression])
        tmp.append(each['scores'][severe])
        result.append(tmp)
    return torch.tensor(result)

In [13]:
model.eval()

zero_shot_weight = 0.7410 / (0.7278 + 0.7410)
classification_weight = 0.7278 / (0.7278 + 0.7410)

preds = []
y_true = []
for i, inputs in enumerate(tqdm(test_loader, leave=False, desc="Evaluating")):
    text = inputs['text']
    input_ids = inputs['input_ids'].to(device)
    token_type_ids = inputs['token_type_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    features = inputs['features'].to(device)
    label = inputs['label'].to(device)

    with torch.no_grad():
        classification_logits = model(input_ids, token_type_ids, attention_mask, features).cpu()
    classification_logits = F.softmax(classification_logits, dim=1)

    predictions = classifier(
        text,
        candidate_labels=['moderate', 'not depression', 'severe'],
        hypothesis_template='"The level of depression in this tweet is {}."',
        tokenizer_kwargs={'padding': 'max_length', 'truncation': 'only_first', 'max_length': 512}
    )
    zero_shot_logits = get_logits_zero_shot(predictions)

    classification_logits *= classification_weight
    zero_shot_logits *= zero_shot_weight
    logits = classification_logits + zero_shot_logits

    y_pred = torch.argmax(logits, 1)

    preds += y_pred.cpu().tolist()
    y_true += label.cpu().tolist()

weighted = f1_score(np.array(y_true), np.array(preds), average='weighted')
macro = f1_score(np.array(y_true), np.array(preds), average='macro')
accuracy = accuracy_score(np.array(y_true), np.array(preds))
print("Weighted F1 score: %.4f, Macro F1 score: %.4f, Accuracy: %.4f" % (weighted, macro, accuracy))

Evaluating:   0%|          | 0/649 [00:00<?, ?it/s]



Weighted F1 score: 0.8358, Macro F1 score: 0.7909, Accuracy: 0.8351


## Evaluation (Classification + MaskedLM + Zero-shot classification)

Weight for zero-shot classification $0.7410 \div (0.7278 + 0.7410 + 0.3786)$

Weight for classification $0.7278 \div (0.7278 + 0.7410 + 0.3786)$

Weight for MaskedLM $0.3786 \div (0.7278 + 0.3786 + 0.7410)$

In [14]:
model.eval()

zero_shot_weight = 0.7410 / (0.7278 + 0.7410 + 0.3786)
classification_weight = 0.7278 / (0.7278 + 0.7410 + 0.3786)
masked_weight = 0.3786 / (0.7278 + 0.3786 + 0.7410)

preds = []
y_true = []
for i, inputs in enumerate(tqdm(test_loader, leave=False, desc="Evaluating")):
    text = inputs['text']
    masked_text = inputs['masked_text']
    input_ids = inputs['input_ids'].to(device)
    token_type_ids = inputs['token_type_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    features = inputs['features'].to(device)
    label = inputs['label'].to(device)

    with torch.no_grad():
        classification_logits = model(input_ids, token_type_ids, attention_mask, features).cpu()
    classification_logits = F.softmax(classification_logits, dim=1)

    predictions = classifier(
        text,
        candidate_labels=['moderate', 'not depression', 'severe'],
        hypothesis_template='"The level of depression in this tweet is {}."',
        tokenizer_kwargs={'padding': 'max_length', 'truncation': 'only_first', 'max_length': 512}
    )
    zero_shot_logits = get_logits_zero_shot(predictions)

    predictions = mask_filler(
        masked_text,
        top_k=3,
        targets=['moderate', 'healthy', 'severe'],
        tokenizer_kwargs={'padding': 'max_length', 'truncation': True, 'max_length': 512}
    )
    masked_logits = get_logits(predictions)

    classification_logits *= classification_weight
    zero_shot_logits *= zero_shot_weight
    masked_logits *= masked_weight
    logits = classification_logits + zero_shot_logits + masked_logits

    y_pred = torch.argmax(logits, 1)

    preds += y_pred.cpu().tolist()
    y_true += label.cpu().tolist()

weighted = f1_score(np.array(y_true), np.array(preds), average='weighted')
macro = f1_score(np.array(y_true), np.array(preds), average='macro')
accuracy = accuracy_score(np.array(y_true), np.array(preds))
print("Weighted F1 score: %.4f, Macro F1 score: %.4f, Accuracy: %.4f" % (weighted, macro, accuracy))

Evaluating:   0%|          | 0/649 [00:00<?, ?it/s]



Weighted F1 score: 0.8358, Macro F1 score: 0.7909, Accuracy: 0.8351
