In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:

# set up the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# load the data
df = pd.read_csv('train.csv')


In [None]:
# split the data into prompt tuning set and evaluation benchmark
train_size = int(0.1 * len(df))
df_train = df[:train_size]
df_val = df[train_size:]

In [None]:
# define the prompt engineering techniques to try
prompt_techniques = [
    lambda text: f"disaster: {text}",
    lambda text: f"{text} [SEP] disaster",
    lambda text: f"disaster [SEP] {text}"
]


In [None]:
# set up hyperparameters
epochs = 3
batch_size = 16
learning_rate = 2e-5

In [None]:

# train and evaluate the model with each prompt engineering technique
for i, prompt in enumerate(prompt_techniques):
    # tokenize the data and add special tokens
    train_texts = [prompt(text) for text in df_train['text'].tolist()]
    val_texts = [prompt(text) for text in df_val['text'].tolist()]
    train_encodings = tokenizer(train_texts, truncation=True, padding=True)
    val_encodings = tokenizer(val_texts, truncation=True, padding=True)

    # convert data to tensors
    train_inputs = torch.tensor(train_encodings['input_ids'])
    train_labels = torch.tensor(df_train['target'].tolist())
    train_masks = torch.tensor(train_encodings['attention_mask'])

    val_inputs = torch.tensor(val_encodings['input_ids'])
    val_labels = torch.tensor(df_val['target'].tolist())
    val_masks = torch.tensor(val_encodings['attention_mask'])

    # set up the data loaders
    train_data = TensorDataset(train_inputs, train_masks, train_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    val_data = TensorDataset(val_inputs, val_masks, val_labels)
    val_sampler = SequentialSampler(val_data)
    val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

    # set up the model
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
    optimizer = AdamW(model.parameters(), lr=learning_rate)

    # train the model
    print(f"\nTraining model with prompt engineering technique {i+1}")
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        model.train()
        total_loss = 0
        for step, batch in enumerate(train_dataloader):
            batch_inputs, batch_masks, batch_labels = tuple(t.to(device) for t in batch)
            optimizer.zero_grad()
            outputs = model(batch_inputs, token_type_ids=None, attention_mask=batch_masks, labels=batch_labels)
            loss = outputs[0]
            total_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
        avg_train_loss = total_loss / len(train_dataloader)
        print(f"Average training loss: {avg_train_loss:.4f}")

        # evaluate the model on the validation set
        model.eval()
        preds = []
        true_labels = []
        for batch in val_dataloader:
            batch_inputs, batch_masks, batch_labels = tuple(t.to(device) for t in batch)
            with torch.no_grad():
                outputs = model(batch_inputs, token_type_ids=None, attention_mask=batch_masks)

            logits = outputs[0]
            _, batch_pred = torch.max(logits, dim=1)
            preds.extend(batch_pred.tolist())
            true_labels.extend(batch_labels.tolist())

        accuracy = accuracy_score(true_labels, preds)
        precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, preds, average='binary')

        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1-score: {f1_score:.4f}")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training model with prompt engineering technique 1

Epoch 1/3
Average training loss: 0.5199
Accuracy: 0.7736
Precision: 0.8388
Recall: 0.5955
F1-score: 0.6965

Epoch 2/3
Average training loss: 0.3722
Accuracy: 0.7611
Precision: 0.9344
Recall: 0.4865
F1-score: 0.6398

Epoch 3/3
Average training loss: 0.2849
Accuracy: 0.7995
Precision: 0.7961
Recall: 0.7263
F1-score: 0.7596


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training model with prompt engineering technique 2

Epoch 1/3
Average training loss: 0.5387
Accuracy: 0.7512
Precision: 0.9105
Recall: 0.4764
F1-score: 0.6255

Epoch 2/3
Average training loss: 0.3710
Accuracy: 0.7882
Precision: 0.8239
Recall: 0.6544
F1-score: 0.7294

Epoch 3/3
Average training loss: 0.2919
Accuracy: 0.7808
Precision: 0.7326
Recall: 0.7835
F1-score: 0.7572


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training model with prompt engineering technique 3

Epoch 1/3
Average training loss: 0.5673
Accuracy: 0.7843
Precision: 0.8662
Recall: 0.5979
F1-score: 0.7074

Epoch 2/3
Average training loss: 0.3985
Accuracy: 0.7837
Precision: 0.8167
Recall: 0.6501
F1-score: 0.7239

Epoch 3/3
Average training loss: 0.2713
Accuracy: 0.7820
Precision: 0.7524
Recall: 0.7454
F1-score: 0.7489
