In [47]:
import transformers as T
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score
device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [48]:
# 有些中文的標點符號在tokenizer編碼以後會變成[UNK]，所以將其換成英文標點
token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [49]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/"
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        # 把中文標點替換掉
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}


In [50]:
# Define the hyperparameters
lr = 3e-5
epochs = 3
train_batch_size = 8
validation_batch_size = 8

In [51]:
tokenizer = T.BertTokenizer.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")

In [52]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

def collate_fn(batch):
    # TODO1-1: Implement the collate_fn function
    # Write your code here
    # The input parameter is a data batch (tuple), and this function packs it into tensors.
    # Use tokenizer to pack tokenize and pack the data and its corresponding labels.
    # Return the data batch and labels for each sub-task.
    # Extract relevant fields from the batch
    sentence_pairs = [item['premise'] + " [SEP] " + item['hypothesis'] for item in batch]
    relatedness_scores = [item['relatedness_score'] for item in batch]
    entailment_judgments = [item['entailment_judgment'] for item in batch]
    
    # Tokenize the input text and convert to tensors with padding
    encoded_inputs = tokenizer(
        sentence_pairs,
        return_tensors='pt',
        padding=True,            # Pad the sequences to the maximum length in the batch
        truncation=True,          # Truncate sequences that are longer than the max model length
        max_length=512            # You can change max_length depending on your data/model
    )
    
    # Convert relatedness scores and entailment judgments to tensors
    relatedness_scores_tensor = torch.tensor(relatedness_scores, dtype=torch.float)
    entailment_judgments_tensor = torch.tensor(entailment_judgments, dtype=torch.long)
    
    # Return the dictionary containing input_ids, attention_mask, relatedness_scores, and entailment_judgments
    return {
        'input_ids': encoded_inputs['input_ids'],
        'attention_mask': encoded_inputs['attention_mask'],
        'relatedness_score': relatedness_scores_tensor,
        'entailment_judgment': entailment_judgments_tensor
    }

    
# TODO1-2: Define your DataLoader
ds_train = SemevalDataset(split="train").data
ds_validation = SemevalDataset(split="validation").data
dl_train = DataLoader(ds_train, batch_size=train_batch_size, collate_fn=collate_fn) # Write your code here
dl_validation = DataLoader(ds_validation, batch_size=validation_batch_size, collate_fn=collate_fn) # Write your code here

In [53]:
# TODO2: Construct your model
class MultiLabelModel(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Write your code here
        # Define what modules you will use in the model
        self.bert = T.BertModel.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear_relatedness = torch.nn.Linear(self.bert.config.hidden_size, 1)  # For predicting relatedness score
        self.linear_entailment = torch.nn.Linear(self.bert.config.hidden_size, 3)   # For predicting entailment judgment (e.g., 3 classes)
    def forward(self, **kwargs):
        # Write your code here
        # Forward pass
        # Forward pass through BERT
        input_ids = kwargs['input_ids']
        attention_mask = kwargs['attention_mask']
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output  # Get the pooled output for classification tasks
        
        # Apply dropout
        pooled_output = self.dropout(pooled_output)
        
        # Predict relatedness score
        relatedness_score = self.linear_relatedness(pooled_output).squeeze(-1)
        
        # Predict entailment judgment
        entailment_judgment = self.linear_entailment(pooled_output)
        
        return {
            'relatedness_score': relatedness_score,
            'entailment_judgment': entailment_judgment
        }

In [54]:
model = MultiLabelModel().to(device)

In [55]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)# Write your code here

# TODO3-2: Define your loss functions (you should have two)
# Write your code here
MSE = torch.nn.MSELoss()
CrossEntorpy = torch.nn.CrossEntropyLoss()

# scoring functions
spc = SpearmanCorrCoef()
acc = Accuracy(task="multiclass", num_classes=3)
f1 = F1Score(task="multiclass", num_classes=3, average='macro')



In [None]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()
    # TODO4: Write the training loop
    # Write your code here
    # train your model
    # clear gradient
    # forward pass
    # compute loss
    # back-propagation
    # model optimization
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        relatedness_scores = batch['relatedness_score'].to(device)
        entailment_judgments = batch['entailment_judgment'].to(device)
        
        optimizer.zero_grad()
        pred = model(input_ids = input_ids, attention_mask = attention_mask)
        print(pred)
        loss_regression = MSE(pred['relatedness_score'], relatedness_scores)
        loss_classification = CrossEntorpy(pred['entailment_judgment'], entailment_judgments)
        loss = loss_regression + loss_classification
        loss.backward()
        optimizer.step()
        

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    # Evaluate your model
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)
    with torch.no_grad():
        total_val_loss = 0
        for batch in pbar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            relatedness_scores = batch['relatedness_score'].to(device)
            entailment_judgments = batch['entailment_judgment'].to(device)
            
            # Forward pass
            pred = model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Compute loss
            loss_relatedness = MSE(pred['relatedness_score'], relatedness_scores)
            loss_entailment = CrossEntorpy(pred['entailment_judgment'], entailment_judgments)
            total_val_loss += (loss_relatedness + loss_entailment).item()
        
        avg_val_loss = total_val_loss / len(dl_validation)
        print(f"Validation Loss: {avg_val_loss}")
    torch.save(model, f'./saved_models/ep{ep}.ckpt')

For test set predictions, you can write perform evaluation simlar to #TODO5.