In [1]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [2]:
!pip install -U transformers evaluate accelerate

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [3]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForMultipleChoice
from sklearn.metrics import f1_score
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm.auto import tqdm

try:
    from google.colab import drive
    drive.mount('/content/gdrive')

    train_path = '/content/gdrive/MyDrive/advanced-ml-project/data/train.tsv'
    test_path = '/content/gdrive/MyDrive/advanced-ml-project/data/test.tsv'
    dev_path = '/content/gdrive/MyDrive/advanced-ml-project/data/dev.tsv'

    eval_model_path = '/content/gdrive/MyDrive/advanced-ml-project/roberta-base-multiple-choice'

except:
    train_path = 'data/train.tsv'
    test_path = 'data/test.tsv'
    dev_path = 'data/dev.tsv'
    eval_model_path = 'roberta-base-multiple-choice'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

## Load Data

In [4]:
train = pd.read_csv(train_path, sep='\t', header=0)
train = train.sample(frac=1).reset_index(drop=True)
print('Length of train:', len(train))

test = pd.read_csv(test_path, sep='\t', header=0)
test = test.sample(frac=1).reset_index(drop=True)
print('Length of test:', len(test))

dev = pd.read_csv(dev_path, sep='\t', header=0)
dev = dev.sample(frac=1).reset_index(drop=True)
print('Length of dev:', len(dev))

print(train.label.value_counts())
train.head(10)

Length of train: 8891
Length of test: 3245
Length of dev: 4496
label
moderate          6019
not depression    1971
severe             901
Name: count, dtype: int64


Unnamed: 0,PID,text,label
0,train_pid_2789,Mental breakdown when trying to do something p...,moderate
1,train_pid_2426,Just talking and rambling on idk : Hey everyon...,moderate
2,train_pid_3430,What if : What if you couldnt feel bain jelasy...,moderate
3,train_pid_5046,How do you get over feeling sad about the pass...,moderate
4,train_pid_6829,Lost all friends to drugs : [removed],not depression
5,train_pid_306,PLEASE SEND HELP : PLEASE ANYONE WANTED TO TAL...,moderate
6,train_pid_7840,"If you wanna talk pm me, here for ya 😀 : [remo...",not depression
7,train_pid_5889,How did you come to terms with depression and ...,moderate
8,train_pid_3006,"New decade, hopefully new me : So as of starti...",moderate
9,train_pid_815,Hope 2020 will be better for all of us : I wen...,moderate


In [5]:
# reform data
label2idx = {'moderate': 0, 'not depression': 1, 'severe':2}

train['ending0'] = ['moderate'] * len(train)
train['ending1'] = ['not depression'] * len(train)
train['ending2'] = ['severe'] * len(train)
train['prompt'] = ['The level of depression in the following tweet is'] * len(train)
train['label'] = train['label'].apply(lambda x: label2idx[x])

test['ending0'] = ['moderate'] * len(test)
test['ending1'] = ['not depression'] * len(test)
test['ending2'] = ['severe'] * len(test)
test['prompt'] = ['The level of depression in the following tweet is'] * len(test)
test['label'] = test['label'].apply(lambda x: label2idx[x])

dev['ending0'] = ['moderate'] * len(dev)
dev['ending1'] = ['not depression'] * len(dev)
dev['ending2'] = ['severe'] * len(dev)
dev['prompt'] = ['The level of depression in the following tweet is'] * len(dev)
dev['label'] = dev['label'].apply(lambda x: label2idx[x])
train.head(10)

Unnamed: 0,PID,text,label,ending0,ending1,ending2,prompt
0,train_pid_2789,Mental breakdown when trying to do something p...,0,moderate,not depression,severe,The level of depression in the following tweet is
1,train_pid_2426,Just talking and rambling on idk : Hey everyon...,0,moderate,not depression,severe,The level of depression in the following tweet is
2,train_pid_3430,What if : What if you couldnt feel bain jelasy...,0,moderate,not depression,severe,The level of depression in the following tweet is
3,train_pid_5046,How do you get over feeling sad about the pass...,0,moderate,not depression,severe,The level of depression in the following tweet is
4,train_pid_6829,Lost all friends to drugs : [removed],1,moderate,not depression,severe,The level of depression in the following tweet is
5,train_pid_306,PLEASE SEND HELP : PLEASE ANYONE WANTED TO TAL...,0,moderate,not depression,severe,The level of depression in the following tweet is
6,train_pid_7840,"If you wanna talk pm me, here for ya 😀 : [remo...",1,moderate,not depression,severe,The level of depression in the following tweet is
7,train_pid_5889,How did you come to terms with depression and ...,0,moderate,not depression,severe,The level of depression in the following tweet is
8,train_pid_3006,"New decade, hopefully new me : So as of starti...",0,moderate,not depression,severe,The level of depression in the following tweet is
9,train_pid_815,Hope 2020 will be better for all of us : I wen...,0,moderate,not depression,severe,The level of depression in the following tweet is


## Preprocessing Data

In [6]:
class MultipleChoiceDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        features = self.df.loc[idx, :]
        context = features['text']
        prompt = features['prompt']
        choices = [f'{prompt} {each}' for each in features[['ending0', 'ending1', 'ending2']]]
        label = features['label']

        encoding = self.tokenizer(
            [context] * 3,
            choices,
            return_tensors="pt",
            padding='max_length',
            max_length=self.tokenizer.model_max_length,
            truncation=True
        )
        encoding['labels'] = torch.tensor(label)
        return encoding

tokenizer = AutoTokenizer.from_pretrained("roberta-base")

train_data = MultipleChoiceDataset(train, tokenizer)
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)

test_data = MultipleChoiceDataset(test, tokenizer)
test_loader = DataLoader(test_data, batch_size=4, shuffle=True)

dev_data = MultipleChoiceDataset(dev, tokenizer)
dev_loader = DataLoader(dev_data, batch_size=4, shuffle=True)

## Training model

In [7]:
def compute_class_weight(train_y):
    """
    Compute class weight given imbalanced training data
    Usually used in the neural network model to augment the loss function (weighted loss function)
    Favouring/giving more weights to the rare classes.
    """
    import sklearn.utils.class_weight as scikit_class_weight

    train_y = [each for each in train_y]
    class_list = list(set(train_y))
    class_weight_value = scikit_class_weight.compute_class_weight(class_weight='balanced', classes=class_list, y=train_y)

    return torch.tensor(class_weight_value).to(torch.float).to(device)

In [8]:
model = AutoModelForMultipleChoice.from_pretrained("roberta-base", num_labels=3).to(device)
criterion = nn.CrossEntropyLoss(weight=compute_class_weight(train.label.to_list()))
optimizer = AdamW(model.parameters(), lr=5e-5)
n_epoch = 3

Some weights of RobertaForMultipleChoice were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'classifier.weight', 'roberta.pooler.dense.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
for epoch in tqdm(range(n_epoch), desc=f"Training progress", colour="#00ff00"):
    training_loss = []
    training_f1 = []
    testing_loss = []
    testing_f1 = []

    model.train()
    for i, inputs in enumerate(tqdm(train_loader, leave=False, desc=f"Epoch {epoch + 1}/{n_epoch}", colour="#00ff00")):
        labels = inputs.pop('labels').to(device)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        optimizer.zero_grad()
        outputs = model(**inputs)

        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()
        predictions = torch.argmax(outputs.logits, dim=1)

        training_loss.append(loss.item())
        training_f1.append(f1_score(labels.cpu(), predictions.cpu(), average='weighted'))

    model.eval()
    for i, inputs in enumerate(tqdm(test_loader, leave=False, desc='Evaluating')):
        labels = inputs.pop('labels').to(device)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        outputs = model(**inputs)

        loss = criterion(outputs.logits, labels)
        predictions = torch.argmax(outputs.logits, dim=1)

        testing_loss.append(loss.item())
        testing_f1.append(f1_score(labels.cpu(), predictions.cpu(), average='weighted'))

    print('Epoch %d, training loss: %.3f, traing f1: %.3f, testing loss: %.3f, testing f1: %.3f' %
         (epoch+1, np.mean(training_loss), np.mean(training_f1), np.mean(testing_loss), np.mean(testing_f1)))

Training progress:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 1/3:   0%|          | 0/2223 [00:00<?, ?it/s]

Epoch 1/3:   0%|          | 0/812 [00:00<?, ?it/s]

Epoch 1, training loss: 1.105, traing f1: 0.364, testing loss: 1.099, testing f1: 0.467


Epoch 2/3:   0%|          | 0/2223 [00:00<?, ?it/s]

Epoch 2/3:   0%|          | 0/812 [00:00<?, ?it/s]

Epoch 2, training loss: 1.105, traing f1: 0.366, testing loss: 1.099, testing f1: 0.202


Epoch 3/3:   0%|          | 0/2223 [00:00<?, ?it/s]