In [None]:
import transformers as T
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score

In [None]:
token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [None]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/", trust_remote_code=True
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        # Token replacement
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

In [None]:
# Define the hyperparameters
lr = 3e-5
epochs = 10
train_batch_size = 64
validation_batch_size = 64
test_batch_size = 64

In [None]:
tokenizer = T.BertTokenizer.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "mps" if torch.backends.mps.is_available() else "cpu"

In [None]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

def collate_fn(batch):
    # TODO1-1: Implement the collate_fn function
    # Write your code here
    # The input parameter is a data batch (tuple), and this function packs it into tensors.
    # Use tokenizer to pack tokens and pack the data and its corresponding labels.
    # Return the data batch and labels for each sub-task.
    complete_text = [
        f"{example['premise']}[SEP]{example['hypothesis']}"
        for example in batch
    ]
    complete_text = tokenizer.batch_encode_plus(
        complete_text,
        padding=True,
        truncation=True,
        return_tensors="pt",
        add_special_tokens=False,
    )
    complete_text['labels_reg'] = torch.tensor([b['relatedness_score'] for b in batch])
    complete_text['labels_cls'] = torch.tensor([b['entailment_judgment'] for b in batch])
    
    # Move the data to the device
    complete_text = {k: complete_text[k].to(device) for k in complete_text}
    
    return complete_text

# TODO1-2: Define your DataLoader
ds_train = SemevalDataset("train")
ds_validation = SemevalDataset("validation")

dl_train = DataLoader(ds_train, batch_size=train_batch_size, shuffle=True, collate_fn=collate_fn)
dl_validation = DataLoader(ds_validation, batch_size=validation_batch_size, shuffle=False, collate_fn=collate_fn)

ds_test = SemevalDataset("test")
dl_test = DataLoader(ds_test, batch_size=test_batch_size, shuffle=False, collate_fn=collate_fn)

# RegModel

In [9]:
# TODO2: Construct your model
class RegModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Write your code here
        # Define what modules you will use in the model
        self.bert = T.BertModel.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
        self.regression_head = torch.nn.Sequential(
            torch.nn.Linear(768, 384),
            torch.nn.ReLU(),
            torch.nn.Linear(384, 192),
            torch.nn.ReLU(),
            torch.nn.Linear(192, 1)
        )
        
    def forward(self, **inputs):
        # Write your code here
        # Forward pass
        # BERT
        bert_output = self.bert(inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids'])
        # [CLS] token hidden state
        cls_token_output = bert_output.last_hidden_state[:, 0, :]
        # Task-specific heads
        reg_output = self.regression_head(cls_token_output)

        return reg_output.squeeze(-1)

In [11]:
# Use both GPUs
model = RegModel()

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [12]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
# optimizer = torch.optim.AdamW(model.parameters(), lr=lr)# Write your code here
optimizer = torch.optim.AdamW([
    {'params': model.bert.parameters(), 'lr': 3e-5},
    {'params': model.regression_head.parameters(), 'lr': 3e-3},
])

# TODO3-2: Define your loss functions (you should have two)
# Write your code here
criterion_reg = torch.nn.MSELoss()  # Regression loss

# scoring functions
spc = SpearmanCorrCoef().to(device)



In [13]:
# This is the sample code from Pytorch
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
    
model = model.to(device)

Let's use 2 GPUs!


In [14]:
%mkdir ./saved_models

In [15]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()
    # TODO4: Write the training loop
    # Write your code here
    for inputs in pbar:
        # train your model
        # clear gradient
        optimizer.zero_grad()
        # forward pass
        outputs_reg = model(**inputs)
        # compute loss
        loss_reg = criterion_reg(outputs_reg, inputs['labels_reg'])
        # back-propagation
        loss = loss_reg
        loss.backward()
        # model optimization
        optimizer.step()
        # update progress bar
        pbar.set_postfix(loss=loss.item())

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    for inputs in pbar:
        # Evaluate your model
        outputs_reg = None
        with torch.no_grad():
            outputs_reg = model(**inputs)
        # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
        pred_reg = outputs_reg
        spc.update(pred_reg, inputs['labels_reg'])

    # Print the evaluation scores
    print(f'Spearman CorrCoef: {spc.compute()}')
    # Reset the evaluation metrics
    spc.reset()
    # Save the model
    torch.save(model, f'./saved_models/ep{ep}.ckpt')

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Training epoch [1/10]: 100%|██████████| 71/71 [00:26<00:00,  2.71it/s, loss=0.335]
Validation epoch [1/10]: 100%|██████████| 8/8 [00:00<00:00, 10.11it/s]


Spearman CorrCoef: 0.7854442596435547


Training epoch [2/10]: 100%|██████████| 71/71 [00:24<00:00,  2.85it/s, loss=0.304]
Validation epoch [2/10]: 100%|██████████| 8/8 [00:00<00:00, 10.06it/s]


Spearman CorrCoef: 0.8087268471717834


Training epoch [3/10]: 100%|██████████| 71/71 [00:25<00:00,  2.74it/s, loss=0.332]
Validation epoch [3/10]: 100%|██████████| 8/8 [00:00<00:00,  9.46it/s]


Spearman CorrCoef: 0.8221060037612915


Training epoch [4/10]: 100%|██████████| 71/71 [00:26<00:00,  2.70it/s, loss=0.236]
Validation epoch [4/10]: 100%|██████████| 8/8 [00:01<00:00,  7.84it/s]


Spearman CorrCoef: 0.8258858323097229


Training epoch [5/10]: 100%|██████████| 71/71 [00:27<00:00,  2.59it/s, loss=0.127] 
Validation epoch [5/10]: 100%|██████████| 8/8 [00:00<00:00,  8.95it/s]


Spearman CorrCoef: 0.8315981030464172


Training epoch [6/10]: 100%|██████████| 71/71 [00:27<00:00,  2.57it/s, loss=0.137] 
Validation epoch [6/10]: 100%|██████████| 8/8 [00:00<00:00,  9.28it/s]


Spearman CorrCoef: 0.8240798115730286


Training epoch [7/10]: 100%|██████████| 71/71 [00:27<00:00,  2.63it/s, loss=0.12]  
Validation epoch [7/10]: 100%|██████████| 8/8 [00:00<00:00,  9.20it/s]


Spearman CorrCoef: 0.8321607708930969


Training epoch [8/10]: 100%|██████████| 71/71 [00:27<00:00,  2.58it/s, loss=0.174] 
Validation epoch [8/10]: 100%|██████████| 8/8 [00:00<00:00,  9.09it/s]


Spearman CorrCoef: 0.8209677338600159


Training epoch [9/10]: 100%|██████████| 71/71 [00:27<00:00,  2.60it/s, loss=0.081] 
Validation epoch [9/10]: 100%|██████████| 8/8 [00:00<00:00,  9.37it/s]


Spearman CorrCoef: 0.8371643424034119


Training epoch [10/10]: 100%|██████████| 71/71 [00:27<00:00,  2.59it/s, loss=0.0814]
Validation epoch [10/10]: 100%|██████████| 8/8 [00:00<00:00,  9.17it/s]


Spearman CorrCoef: 0.8313769698143005


In [16]:
# Test the model on the test set
pbar = tqdm(dl_test)
for inputs in pbar:
    # Evaluate your model
    outputs_reg = None
    with torch.no_grad():
        outputs_reg = model(**inputs)
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
    pred_reg = outputs_reg
    spc.update(pred_reg, inputs['labels_reg'])

# Print the evaluation scores
print(f'Spearman CorrCoef: {spc.compute()}')
# Reset the evaluation metrics
spc.reset()

100%|██████████| 77/77 [00:07<00:00, 10.07it/s]

Spearman CorrCoef: 0.8125749230384827





## ClsModel

In [17]:
# TODO2: Construct your model
class MultiLabelModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Write your code here
        # Define what modules you will use in the model
        self.bert = T.BertModel.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
        self.classification_head = torch.nn.Sequential(
            torch.nn.Linear(768, 384),
            torch.nn.ReLU(),
            torch.nn.Linear(384, 192),
            torch.nn.ReLU(),
            torch.nn.Linear(192, 3)
        )
        
    def forward(self, **inputs):
        # Write your code here
        # Forward pass
        # BERT
        bert_output = self.bert(inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids'])
        # [CLS] token hidden state
        cls_token_output = bert_output.last_hidden_state[:, 0, :]
        # Task-specific heads
        cls_output = self.classification_head(cls_token_output)

        return cls_output

In [19]:
model = ClsModel()

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [20]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
# optimizer = torch.optim.AdamW(model.parameters(), lr=lr)# Write your code here
optimizer = torch.optim.AdamW([
    {'params': model.bert.parameters(), 'lr': 3e-5},
    {'params': model.classification_head.parameters(), 'lr': 3e-3}
])

# TODO3-2: Define your loss functions (you should have two)
# Write your code here
criterion_cls = torch.nn.CrossEntropyLoss()  # Classification loss

# scoring functions
acc = Accuracy(task="multiclass", num_classes=3).to(device)
f1 = F1Score(task="multiclass", num_classes=3, average='macro').to(device)

In [21]:
# This is the sample code from Pytorch
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)
    
model = model.to(device)

Let's use 2 GPUs!


In [22]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()
    # TODO4: Write the training loop
    # Write your code here
    for inputs in pbar:
        # train your model
        # clear gradient
        optimizer.zero_grad()
        # forward pass
        outputs_cls = model(**inputs)
        # compute loss
        loss_cls = criterion_cls(outputs_cls, inputs['labels_cls'])
        # back-propagation
        loss = loss_cls
        loss.backward()
        # model optimization
        optimizer.step()
        # update progress bar
        pbar.set_postfix(loss=loss.item())

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    for inputs in pbar:
        # Evaluate your model
        outputs_cls = None
        with torch.no_grad():
            outputs_cls = model(**inputs)
        # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
        pred_cls = outputs_cls.argmax(dim=1)
        acc.update(pred_cls, inputs['labels_cls'])
        f1.update(pred_cls, inputs['labels_cls'])

    # Print the evaluation scores
    print(f'Accuracy: {acc.compute()}')
    print(f'F1 Score: {f1.compute()}')
    # Reset the evaluation metrics
    acc.reset()
    f1.reset()
    # Save the model
    torch.save(model, f'./saved_models/ep{ep}.ckpt')

Training epoch [1/10]: 100%|██████████| 71/71 [01:24<00:00,  1.19s/it, loss=0.683]
Validation epoch [1/10]: 100%|██████████| 8/8 [00:03<00:00,  2.16it/s]


Accuracy: 0.7820000052452087
F1 Score: 0.7963052988052368


Training epoch [2/10]: 100%|██████████| 71/71 [01:21<00:00,  1.15s/it, loss=0.39] 
Validation epoch [2/10]: 100%|██████████| 8/8 [00:03<00:00,  2.30it/s]


Accuracy: 0.8479999899864197
F1 Score: 0.8446929454803467


Training epoch [3/10]: 100%|██████████| 71/71 [01:20<00:00,  1.14s/it, loss=0.205]
Validation epoch [3/10]: 100%|██████████| 8/8 [00:03<00:00,  2.32it/s]


Accuracy: 0.8659999966621399
F1 Score: 0.8610131740570068


Training epoch [4/10]: 100%|██████████| 71/71 [01:20<00:00,  1.13s/it, loss=0.192] 
Validation epoch [4/10]: 100%|██████████| 8/8 [00:03<00:00,  2.27it/s]


Accuracy: 0.8320000171661377
F1 Score: 0.821036696434021


Training epoch [5/10]: 100%|██████████| 71/71 [01:21<00:00,  1.14s/it, loss=0.159]
Validation epoch [5/10]: 100%|██████████| 8/8 [00:03<00:00,  2.32it/s]


Accuracy: 0.878000020980835
F1 Score: 0.8655569553375244


Training epoch [6/10]: 100%|██████████| 71/71 [01:20<00:00,  1.14s/it, loss=0.0378]
Validation epoch [6/10]: 100%|██████████| 8/8 [00:03<00:00,  2.31it/s]


Accuracy: 0.8640000224113464
F1 Score: 0.8606765270233154


Training epoch [7/10]: 100%|██████████| 71/71 [01:20<00:00,  1.14s/it, loss=0.154]  
Validation epoch [7/10]: 100%|██████████| 8/8 [00:03<00:00,  2.26it/s]


Accuracy: 0.8600000143051147
F1 Score: 0.8503251075744629


Training epoch [8/10]: 100%|██████████| 71/71 [01:20<00:00,  1.13s/it, loss=0.0722] 
Validation epoch [8/10]: 100%|██████████| 8/8 [00:03<00:00,  2.31it/s]


Accuracy: 0.8560000061988831
F1 Score: 0.846443772315979


Training epoch [9/10]: 100%|██████████| 71/71 [01:21<00:00,  1.14s/it, loss=0.0824] 
Validation epoch [9/10]: 100%|██████████| 8/8 [00:03<00:00,  2.30it/s]


Accuracy: 0.8619999885559082
F1 Score: 0.8548198938369751


Training epoch [10/10]: 100%|██████████| 71/71 [01:20<00:00,  1.14s/it, loss=0.00441]
Validation epoch [10/10]: 100%|██████████| 8/8 [00:03<00:00,  2.26it/s]


Accuracy: 0.8539999723434448
F1 Score: 0.846053957939148


In [23]:
# Test the model on the test set
pbar = tqdm(dl_test)
for inputs in pbar:
    # Evaluate your model
    outputs_cls = None
    with torch.no_grad():
        outputs_cls = model(**inputs)
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
    pred_cls = outputs_cls.argmax(dim=1)
    acc.update(pred_cls, inputs['labels_cls'])
    f1.update(pred_cls, inputs['labels_cls'])

# Print the evaluation scores
print(f'Accuracy: {acc.compute()}')
print(f'F1 Score: {f1.compute()}')

# Reset the evaluation metrics
acc.reset()
f1.reset()

100%|██████████| 77/77 [00:30<00:00,  2.53it/s]

Accuracy: 0.8591434955596924
F1 Score: 0.8502813577651978



