In [1]:
import transformers as T
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score

In [2]:
token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [3]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/", trust_remote_code=True
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        # Token replacement
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

Downloading builder script:   0%|          | 0.00/5.20k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.56k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/87.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/93.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/4927 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}


In [4]:
# Define the hyperparameters
lr = 3e-5
epochs = 12
train_batch_size = 64
validation_batch_size = 64
test_batch_size = 64

In [6]:
tokenizer = T.BertTokenizer.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "mps" if torch.backends.mps.is_available() else "cpu"

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [7]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

def collate_fn(batch):
    # TODO1-1: Implement the collate_fn function
    # Write your code here
    # The input parameter is a data batch (tuple), and this function packs it into tensors.
    # Use tokenizer to pack tokens and pack the data and its corresponding labels.
    # Return the data batch and labels for each sub-task.
    complete_text = [
        f"{example['premise']}[SEP]{example['hypothesis']}"
        for example in batch
    ]
    complete_text = tokenizer.batch_encode_plus(
        complete_text,
        padding=True,
        truncation=True,
        return_tensors="pt",
        add_special_tokens=False,
    )
    complete_text['labels_reg'] = torch.tensor([b['relatedness_score'] for b in batch])
    complete_text['labels_cls'] = torch.tensor([b['entailment_judgment'] for b in batch])
    
    # Move the data to the device
    complete_text = {k: complete_text[k].to(device) for k in complete_text}
    
    return complete_text

# TODO1-2: Define your DataLoader
ds_train = SemevalDataset("train")
ds_validation = SemevalDataset("validation")

dl_train = DataLoader(ds_train, batch_size=train_batch_size, shuffle=True, collate_fn=collate_fn)
dl_validation = DataLoader(ds_validation, batch_size=validation_batch_size, shuffle=False, collate_fn=collate_fn)

ds_test = SemevalDataset("test")
dl_test = DataLoader(ds_test, batch_size=test_batch_size, shuffle=False, collate_fn=collate_fn)

In [8]:
# TODO2: Construct your model
class MultiLabelModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Write your code here
        # Define what modules you will use in the model
        self.bert = T.BertModel.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
        self.regression_head = torch.nn.Linear(768, 1)
        self.classification_head = torch.nn.Linear(768, 3)
        
    def forward(self, **inputs):
        # Write your code here
        # Forward pass
        # BERT
        bert_output = self.bert(inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids'])
        # [CLS]'s final hidden state through BertPooler
        pooler_output = bert_output.pooler_output 
        # Task-specific heads
        reg_output = self.regression_head(pooler_output)
        cls_output = self.classification_head(pooler_output)

        return reg_output.squeeze(-1), cls_output

In [9]:
# Use both GPUs
model = MultiLabelModel()
# This is the sample code from Pytorch
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
    
model = model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Let's use 2 GPUs!


In [10]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)# Write your code here

# TODO3-2: Define your loss functions (you should have two)
# Write your code here
criterion_reg = torch.nn.MSELoss()  # Regression loss
criterion_cls = torch.nn.CrossEntropyLoss()  # Classification loss

# scoring functions
spc = SpearmanCorrCoef().to(device)
acc = Accuracy(task="multiclass", num_classes=3).to(device)
f1 = F1Score(task="multiclass", num_classes=3, average='macro').to(device)



In [11]:
%mkdir ./saved_models

In [12]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()
    # TODO4: Write the training loop
    # Write your code here
    for inputs in pbar:
        # train your model
        # clear gradient
        optimizer.zero_grad()
        # forward pass
        outputs_reg, outputs_cls = model(**inputs)
        # compute loss
        loss_reg = criterion_reg(outputs_reg, inputs['labels_reg'])
        loss_cls = criterion_cls(outputs_cls, inputs['labels_cls'])
        # back-propagation
        loss = loss_reg + loss_cls
        loss.backward()
        # model optimization
        optimizer.step()
        # update progress bar
        pbar.set_postfix(loss=loss.item())

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    for inputs in pbar:
        # Evaluate your model
        outputs_reg, outputs_cls = None, None
        with torch.no_grad():
            outputs_reg, outputs_cls = model(**inputs)
        # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
        pred_reg = outputs_reg
        pred_cls = outputs_cls.argmax(dim=1)
        spc.update(pred_reg, inputs['labels_reg'])
        acc.update(pred_cls, inputs['labels_cls'])
        f1.update(pred_cls, inputs['labels_cls'])

    # Print the evaluation scores
    print(f'Spearman CorrCoef: {spc.compute()}')
    print(f'Accuracy: {acc.compute()}')
    print(f'F1 Score: {f1.compute()}')
    # Reset the evaluation metrics
    spc.reset()
    acc.reset()
    f1.reset()
    # Save the model
    torch.save(model, f'./saved_models/ep{ep}.ckpt')

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Training epoch [1/12]: 100%|██████████| 71/71 [00:25<00:00,  2.78it/s, loss=1.28]
Validation epoch [1/12]: 100%|██████████| 8/8 [00:01<00:00,  7.28it/s]


Spearman CorrCoef: 0.7297316193580627
Accuracy: 0.6700000166893005
F1 Score: 0.4631543755531311


Training epoch [2/12]: 100%|██████████| 71/71 [00:24<00:00,  2.88it/s, loss=1.1]  
Validation epoch [2/12]: 100%|██████████| 8/8 [00:01<00:00,  7.72it/s]


Spearman CorrCoef: 0.7322144508361816
Accuracy: 0.7979999780654907
F1 Score: 0.8063919544219971


Training epoch [3/12]: 100%|██████████| 71/71 [00:25<00:00,  2.82it/s, loss=0.898]
Validation epoch [3/12]: 100%|██████████| 8/8 [00:01<00:00,  7.58it/s]


Spearman CorrCoef: 0.8176438212394714
Accuracy: 0.843999981880188
F1 Score: 0.8382222652435303


Training epoch [4/12]: 100%|██████████| 71/71 [00:25<00:00,  2.74it/s, loss=0.32] 
Validation epoch [4/12]: 100%|██████████| 8/8 [00:01<00:00,  7.39it/s]


Spearman CorrCoef: 0.7992652058601379
Accuracy: 0.8640000224113464
F1 Score: 0.8618513941764832


Training epoch [5/12]: 100%|██████████| 71/71 [00:26<00:00,  2.72it/s, loss=0.369]
Validation epoch [5/12]: 100%|██████████| 8/8 [00:01<00:00,  7.57it/s]


Spearman CorrCoef: 0.8177983164787292
Accuracy: 0.8420000076293945
F1 Score: 0.8437898755073547


Training epoch [6/12]: 100%|██████████| 71/71 [00:25<00:00,  2.76it/s, loss=0.439]
Validation epoch [6/12]: 100%|██████████| 8/8 [00:01<00:00,  7.63it/s]


Spearman CorrCoef: 0.8159134984016418
Accuracy: 0.8519999980926514
F1 Score: 0.8550666570663452


Training epoch [7/12]: 100%|██████████| 71/71 [00:25<00:00,  2.78it/s, loss=0.486]
Validation epoch [7/12]: 100%|██████████| 8/8 [00:01<00:00,  7.50it/s]


Spearman CorrCoef: 0.8028708100318909
Accuracy: 0.843999981880188
F1 Score: 0.8456007242202759


Training epoch [8/12]: 100%|██████████| 71/71 [00:25<00:00,  2.75it/s, loss=0.251]
Validation epoch [8/12]: 100%|██████████| 8/8 [00:01<00:00,  7.50it/s]


Spearman CorrCoef: 0.820298969745636
Accuracy: 0.8659999966621399
F1 Score: 0.8668664693832397


Training epoch [9/12]: 100%|██████████| 71/71 [00:25<00:00,  2.79it/s, loss=0.31] 
Validation epoch [9/12]: 100%|██████████| 8/8 [00:01<00:00,  7.51it/s]


Spearman CorrCoef: 0.7940179705619812
Accuracy: 0.8399999737739563
F1 Score: 0.8378422856330872


Training epoch [10/12]: 100%|██████████| 71/71 [00:25<00:00,  2.74it/s, loss=0.312]
Validation epoch [10/12]: 100%|██████████| 8/8 [00:01<00:00,  7.62it/s]


Spearman CorrCoef: 0.8203834891319275
Accuracy: 0.8600000143051147
F1 Score: 0.858651876449585


Training epoch [11/12]: 100%|██████████| 71/71 [00:25<00:00,  2.77it/s, loss=0.249] 
Validation epoch [11/12]: 100%|██████████| 8/8 [00:01<00:00,  7.54it/s]


Spearman CorrCoef: 0.8298466205596924
Accuracy: 0.8420000076293945
F1 Score: 0.8460279703140259


Training epoch [12/12]: 100%|██████████| 71/71 [00:25<00:00,  2.76it/s, loss=0.084] 
Validation epoch [12/12]: 100%|██████████| 8/8 [00:01<00:00,  7.60it/s]


Spearman CorrCoef: 0.8207735419273376
Accuracy: 0.8700000047683716
F1 Score: 0.8670346736907959


In [13]:
# Test the model on the test set
pbar = tqdm(dl_test)
for inputs in pbar:
    # Evaluate your model
    outputs_reg, outputs_cls = None, None
    with torch.no_grad():
        outputs_reg, outputs_cls = model(**inputs)
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
    pred_reg = outputs_reg
    pred_cls = outputs_cls.argmax(dim=1)
    spc.update(pred_reg, inputs['labels_reg'])
    acc.update(pred_cls, inputs['labels_cls'])
    f1.update(pred_cls, inputs['labels_cls'])

# Print the evaluation scores
print(f'Spearman CorrCoef: {spc.compute()}')
print(f'Accuracy: {acc.compute()}')
print(f'F1 Score: {f1.compute()}')

# Reset the evaluation metrics
spc.reset()
acc.reset()
f1.reset()

100%|██████████| 77/77 [00:09<00:00,  8.25it/s]


Spearman CorrCoef: 0.813919723033905
Accuracy: 0.8573168516159058
F1 Score: 0.8460031747817993
