In [1]:
from datasets import load_dataset

dataset = load_dataset("mrjunos/depression-reddit-cleaned")
print(dataset['train'][2])



{'text': 'anyone else instead of sleeping more when depressed stay up all night to avoid the next day from coming sooner may be the social anxiety in me but life is so much more peaceful when everyone else is asleep and not expecting thing of you', 'label': 1}


In [2]:
import sys
if hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
    print("Inside a virtual environment")
else:
    print("Not inside a virtual environment")


Inside a virtual environment


In [2]:

import torch
import numpy as np
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

class Dataset(torch.utils.data.Dataset):

    def __init__(self, input_data):
        self.labels = [data for data in input_data['label']]
        self.texts = [tokenizer(data,
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for data in input_data['text']]

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.texts[idx]

    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_texts, batch_y

  _torch_pytree._register_pytree_node(


tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [3]:
from torch import nn
from transformers import BertForSequenceClassification

class BertClassifier(nn.Module):

    def __init__(self, model_id='bert-base-cased', num_class=2):
        super(BertClassifier, self).__init__()
        self.bert = BertForSequenceClassification.from_pretrained(model_id, num_labels=num_class)

    def forward(self, input_id, mask):
        output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        return output

  _torch_pytree._register_pytree_node(


In [4]:
torch.cuda.is_available()

True

In [5]:
from torch.optim import Adam
from tqdm import tqdm

def train(model, train_data, val_data, learning_rate, epochs):

    # Fetch training and validation data in batch
    train, val = Dataset(train_data), Dataset(val_data)
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:
       model = model.to(device)
       criterion = criterion.to(device)

    for epoch_num in range(epochs):

        total_acc_train = 0
        total_loss_train = 0
        
        # Fine-tune the model
        for train_input, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)
            mask = train_input['attention_mask'].to(device)
            input_id = train_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)[0]

            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()

            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

        total_acc_val = 0
        total_loss_val = 0
        
        # Validate the model
        with torch.no_grad():

            for val_input, val_label in val_dataloader:
                val_label = val_label.to(device)
                mask = val_input['attention_mask'].to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)[0]

                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()
                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc

        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train): .3f} \
            | Train Accuracy: {total_acc_train / len(train): .3f} \
            | Val Loss: {total_loss_val / len(val): .3f} \
            | Val Accuracy: {total_acc_val / len(val): .3f}')

In [6]:
EPOCHS = 10
LR = 1e-7

model = BertClassifier()
data = dataset['train'].shuffle(seed=42)


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
train2 = Dataset(data)

In [7]:
train(model, data[:6500], data[6500:], LR, EPOCHS)

100%|███████████████████████████████████████| 3250/3250 [04:21<00:00, 12.43it/s]


Epochs: 1 | Train Loss:  0.234             | Train Accuracy:  0.816             | Val Loss:  0.167             | Val Accuracy:  0.890


100%|███████████████████████████████████████| 3250/3250 [04:23<00:00, 12.32it/s]


Epochs: 2 | Train Loss:  0.148             | Train Accuracy:  0.903             | Val Loss:  0.129             | Val Accuracy:  0.913


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.23it/s]


Epochs: 3 | Train Loss:  0.113             | Train Accuracy:  0.931             | Val Loss:  0.099             | Val Accuracy:  0.934


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.22it/s]


Epochs: 4 | Train Loss:  0.084             | Train Accuracy:  0.950             | Val Loss:  0.074             | Val Accuracy:  0.955


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.22it/s]


Epochs: 5 | Train Loss:  0.065             | Train Accuracy:  0.962             | Val Loss:  0.060             | Val Accuracy:  0.962


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.24it/s]


Epochs: 6 | Train Loss:  0.052             | Train Accuracy:  0.970             | Val Loss:  0.052             | Val Accuracy:  0.965


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.24it/s]


Epochs: 7 | Train Loss:  0.043             | Train Accuracy:  0.975             | Val Loss:  0.048             | Val Accuracy:  0.968


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.23it/s]


Epochs: 8 | Train Loss:  0.036             | Train Accuracy:  0.979             | Val Loss:  0.046             | Val Accuracy:  0.968


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.24it/s]


Epochs: 9 | Train Loss:  0.031             | Train Accuracy:  0.983             | Val Loss:  0.044             | Val Accuracy:  0.969


100%|███████████████████████████████████████| 3250/3250 [04:25<00:00, 12.24it/s]


Epochs: 10 | Train Loss:  0.026             | Train Accuracy:  0.986             | Val Loss:  0.043             | Val Accuracy:  0.969


In [8]:
def print_trainable_parameters(model):

    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

print_trainable_parameters(model)
'''
trainable params: 108311810 || all params: 108311810 || trainable%: 100.0
'''

trainable params: 108311810 || all params: 108311810 || trainable%: 100.0


'\ntrainable params: 108311810 || all params: 108311810 || trainable%: 100.0\n'

In [11]:
from adapters import AdapterConfig
from adapters import BertAdapterModel

class BertClassifierWithAdapter(nn.Module):

    def __init__(self, model_id='bert-base-cased', adapter_id='pfeiffer', 
                task_id = 'depression_reddit_dataset', num_class=2):

        super(BertClassifierWithAdapter, self).__init__()

        self.adapter_config = AdapterConfig.load(adapter_id)

        self.bert = BertAdapterModel.from_pretrained(model_id)
        # Insert adapter according to configuration
        self.bert.add_adapter(task_id, config=self.adapter_config)
        # Freeze all BERT-base weights 
        self.bert.train_adapter(task_id)
        # Add prediction layer on top of BERT-base
        self.bert.add_classification_head(task_id, num_labels=num_class)
        # Make sure that adapters and prediction layer are used during forward pass
        self.bert.set_active_adapters(task_id)

    def forward(self, input_id, mask):

        output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)

        return output

  _torch_pytree._register_pytree_node(


In [12]:
# Initialize model 

# task_id is the name of our adapter. You can name it whatever you want but
# common practice is to name it according to task/dataset we will train it on.
task_name = 'depression_reddit_dataset'
model_adapter = BertClassifierWithAdapter(task_id=task_name)
# Check parameters
print_trainable_parameters(model_adapter)

'''
trainable params: 1486658 || all params: 109796930 || trainable%: 1.3540068925424418
'''

Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2078786 || all params: 110418054 || trainable%: 1.8826504585925776


'\ntrainable params: 1486658 || all params: 109796930 || trainable%: 1.3540068925424418\n'

In [13]:
LR = 5e-6
EPOCHS = 10
train(model_adapter, dataset['train'][:6500], dataset['train'][6500:], LR, EPOCHS)

100%|███████████████████████████████████████| 3250/3250 [03:04<00:00, 17.58it/s]


Epochs: 1 | Train Loss:  0.179             | Train Accuracy:  0.832             | Val Loss:  0.105             | Val Accuracy:  0.972


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.22it/s]


Epochs: 2 | Train Loss:  0.087             | Train Accuracy:  0.931             | Val Loss:  0.068             | Val Accuracy:  0.959


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.21it/s]


Epochs: 3 | Train Loss:  0.053             | Train Accuracy:  0.961             | Val Loss:  0.046             | Val Accuracy:  0.977


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.20it/s]


Epochs: 4 | Train Loss:  0.043             | Train Accuracy:  0.967             | Val Loss:  0.047             | Val Accuracy:  0.974


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.21it/s]


Epochs: 5 | Train Loss:  0.039             | Train Accuracy:  0.972             | Val Loss:  0.039             | Val Accuracy:  0.981


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.28it/s]


Epochs: 6 | Train Loss:  0.035             | Train Accuracy:  0.976             | Val Loss:  0.027             | Val Accuracy:  0.984


100%|███████████████████████████████████████| 3250/3250 [03:07<00:00, 17.30it/s]


Epochs: 7 | Train Loss:  0.031             | Train Accuracy:  0.977             | Val Loss:  0.037             | Val Accuracy:  0.981


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.28it/s]


Epochs: 8 | Train Loss:  0.030             | Train Accuracy:  0.978             | Val Loss:  0.023             | Val Accuracy:  0.985


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.28it/s]


Epochs: 9 | Train Loss:  0.026             | Train Accuracy:  0.981             | Val Loss:  0.050             | Val Accuracy:  0.962


100%|███████████████████████████████████████| 3250/3250 [03:08<00:00, 17.28it/s]


Epochs: 10 | Train Loss:  0.025             | Train Accuracy:  0.982             | Val Loss:  0.027             | Val Accuracy:  0.983
