In [1]:
import transformers
import torch
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score

In [2]:
# 有些中文的標點符號在tokenizer編碼以後會變成[UNK]，所以將其換成英文標點

token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [3]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/", trust_remote_code=True
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

train_dataset = SemevalDataset(split="train").data
validation_dataset = SemevalDataset(split="validation").data
test_dataset = SemevalDataset(split="test").data

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(validation_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}
Train dataset size: 4500
Validation dataset size: 500
Test dataset size: 4927


In [4]:
# Define the hyperparameters
lr = 3e-5
epochs = 3
train_batch_size = 16
validation_batch_size = 16
pretrain_model = 'microsoft/deberta-v3-base'
alpha = 0.75
beta = 0.25

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


In [5]:
tokenizer = transformers.DebertaV2Tokenizer.from_pretrained(pretrain_model, cache_dir='./cache/')

In [6]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

def collate_fn(batch):
    premises = [item['premise'] for item in batch]
    hypotheses = [item['hypothesis'] for item in batch]
    relatedness_scores = [item['relatedness_score'] for item in batch]
    entailment_judgments = [item['entailment_judgment'] for item in batch]

    # Tokenize premises and hypotheses, padding to max length within batch
    tokenized_data = tokenizer(
        premises, 
        hypotheses, 
        padding=True, 
        truncation=True, 
        return_tensors="pt"
    )

    # Convert relatedness_scores and entailment_judgments to tensors
    labels_relatedness = torch.tensor(relatedness_scores, dtype=torch.float32)
    labels_entailment = torch.tensor(entailment_judgments, dtype=torch.long)

    # Return tokenized data and labels
    return {
        "input_ids": tokenized_data["input_ids"],
        "attention_mask": tokenized_data["attention_mask"],
        "relatedness_score": labels_relatedness,
        "entailment_judgment": labels_entailment
    }


# TODO1-2: Define your DataLoader
dl_train = DataLoader(
    SemevalDataset(split="train"),
    batch_size=train_batch_size,
    shuffle=True,
    collate_fn=collate_fn,
)
dl_validation = DataLoader(
    SemevalDataset(split="validation"),
    batch_size=validation_batch_size,
    shuffle=False,
    collate_fn=collate_fn,
)
dl_test = DataLoader(
    SemevalDataset(split="test"),
    batch_size=validation_batch_size,
    shuffle=False,
    collate_fn=collate_fn,
)

In [7]:
# TODO2: Construct your model
class Model1(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Write your code here
        # Define what modules you will use in the model
        self.encoder = transformers.AutoModel.from_pretrained(pretrain_model, cache_dir="./cache/")
        self.regression_head = torch.nn.Linear(self.encoder.config.hidden_size, 1)
    def forward(self, **kwargs):
        # Write your code here
        # Forward pass

        output = self.encoder(**kwargs)
        
        last_hidden_state = output.last_hidden_state  # 可以使用 last_hidden_state 或 pooler_output
        pooled_output = last_hidden_state[:, 0]  # 使用 [CLS] token 作為 pooled_output
        
        regression_logits = self.regression_head(pooled_output)

        return regression_logits
    
class Model2(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Write your code here
        # Define what modules you will use in the model
        self.encoder = transformers.AutoModel.from_pretrained(pretrain_model, cache_dir="./cache/")
        self.classification_head = torch.nn.Linear(self.encoder.config.hidden_size, 3)
    def forward(self, **kwargs):
        # Write your code here
        # Forward pass

        output = self.encoder(**kwargs)
        
        last_hidden_state = output.last_hidden_state  # 可以使用 last_hidden_state 或 pooler_output
        pooled_output = last_hidden_state[:, 0]  # 使用 [CLS] token 作為 pooled_output
        
        classification_logits = self.classification_head(pooled_output)

        return classification_logits

In [8]:
model1 = Model1().to(device)
model2 = Model2().to(device)

In [9]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
optimizer1 = AdamW(model1.parameters(), lr=lr, weight_decay=0.01)
optimizer2 = AdamW(model2.parameters(), lr=lr, weight_decay=0.01)


# TODO3-2: Define your loss functions (you should have two)
# Write your code here
mse_loss = torch.nn.MSELoss()
cross_entropy_loss = torch.nn.CrossEntropyLoss()

# scoring functions
spc = SpearmanCorrCoef().to(device)
acc = Accuracy(task="multiclass", num_classes=3).to(device)
f1 = F1Score(task="multiclass", num_classes=3, average='macro').to(device)



In [10]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model1.train()

    total_loss = 0
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        # token_type_ids = batch['token_type_ids'].to(device)
        relatedness_score = batch['relatedness_score'].to(device)

        # Forward pass
        pred_relatedness_score= model1(
            input_ids=input_ids, 
            attention_mask=attention_mask,
        )

        # Compute loss
        loss_relatedness = mse_loss(pred_relatedness_score.squeeze(), relatedness_score)
        loss = loss_relatedness

        optimizer1.zero_grad()
        loss.backward()
        optimizer1.step()

        total_loss += loss.item()

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model1.eval()

    val_loss = 0
    with torch.no_grad():
        for batch in pbar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            relatedness_score = batch['relatedness_score'].to(device)

            pred_relatedness_score = model1(
                input_ids=input_ids, 
                attention_mask=attention_mask,
            )

            # Validation loss calculation
            val_loss_relatedness = mse_loss(pred_relatedness_score.squeeze(), relatedness_score)
            val_loss += val_loss_relatedness
            # Metrics calculation
            spc.update(pred_relatedness_score.squeeze(), relatedness_score)

            

    print(f"Epoch {ep+1}")
    print(f"Validation Loss:     {val_loss / len(dl_validation)}")
    print(f"Spearman:            {spc.compute()}")
    print("")

    spc.reset()

Training epoch [1/3]:   0%|          | 0/282 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Training epoch [1/3]: 100%|██████████| 282/282 [00:28<00:00,  9.81it/s]
Validation epoch [1/3]: 100%|██████████| 32/32 [00:00<00:00, 52.20it/s]


Epoch 1
Validation Loss:     0.3362080752849579
Spearman:            0.8637570738792419



Training epoch [2/3]: 100%|██████████| 282/282 [00:27<00:00, 10.16it/s]
Validation epoch [2/3]: 100%|██████████| 32/32 [00:00<00:00, 50.29it/s]


Epoch 2
Validation Loss:     0.3065238296985626
Spearman:            0.8817991018295288



Training epoch [3/3]: 100%|██████████| 282/282 [00:27<00:00, 10.12it/s]
Validation epoch [3/3]: 100%|██████████| 32/32 [00:00<00:00, 51.74it/s]

Epoch 3
Validation Loss:     0.22525528073310852
Spearman:            0.8875921964645386






In [11]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model2.train()

    total_loss = 0
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        entailment_judgment = batch['entailment_judgment'].to(device)

        # Forward pass
        pred_entailment_judgment = model2(
            input_ids=input_ids, 
            attention_mask=attention_mask,
        )

        # Compute loss
        loss_entailment = cross_entropy_loss(pred_entailment_judgment, entailment_judgment)
        loss = loss_entailment


        # Backward and optimize
        optimizer2.zero_grad()
        loss.backward()
        optimizer2.step()

        total_loss += loss.item()

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model2.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    # Evaluate your model
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)

    val_loss = 0
    with torch.no_grad():
        for batch in pbar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            entailment_judgment = batch['entailment_judgment'].to(device)

            pred_entailment_judgment = model2(
                input_ids=input_ids, 
                attention_mask=attention_mask,
            )

            # Validation loss calculation
            val_loss_entailment = cross_entropy_loss(pred_entailment_judgment, entailment_judgment)
            val_loss += val_loss_entailment

            # Metrics calculation
            acc.update(pred_entailment_judgment, entailment_judgment)
            f1.update(pred_entailment_judgment, entailment_judgment)

            

    print(f"Epoch {ep+1}")
    print(f"Validation Loss:     {val_loss / len(dl_validation)}")
    print(f"Accuracy:            {acc.compute()}")
    print(f"F1 Score:            {f1.compute()}")
    print("")
    # torch.save(model, f'./models/ep{ep}.ckpt')

    acc.reset()
    f1.reset()  

Training epoch [1/3]: 100%|██████████| 282/282 [00:28<00:00,  9.83it/s]
Validation epoch [1/3]: 100%|██████████| 32/32 [00:00<00:00, 45.78it/s]


Epoch 1
Validation Loss:     0.23854047060012817
Accuracy:            0.9020000100135803
F1 Score:            0.8973813056945801



Training epoch [2/3]: 100%|██████████| 282/282 [00:27<00:00, 10.21it/s]
Validation epoch [2/3]: 100%|██████████| 32/32 [00:00<00:00, 46.86it/s]


Epoch 2
Validation Loss:     0.3705654442310333
Accuracy:            0.8880000114440918
F1 Score:            0.8845459222793579



Training epoch [3/3]: 100%|██████████| 282/282 [00:28<00:00,  9.88it/s]
Validation epoch [3/3]: 100%|██████████| 32/32 [00:00<00:00, 44.42it/s]

Epoch 3
Validation Loss:     0.3379969298839569
Accuracy:            0.8960000276565552
F1 Score:            0.8890994191169739






In [12]:
pbar = tqdm(dl_test)
model1.eval()

with torch.no_grad():
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        relatedness_score = batch['relatedness_score'].to(device)


        pred_relatedness_score = model1(
            input_ids=input_ids, 
            attention_mask=attention_mask,
        )

        # Metrics calculation
        spc.update(pred_relatedness_score.squeeze(), relatedness_score)

print(f"Spearman: {spc.compute()}")
print("")

spc.reset()

100%|██████████| 308/308 [00:06<00:00, 46.59it/s]


Spearman: 0.8823131322860718



In [13]:
pbar = tqdm(dl_test)
model2.eval()

with torch.no_grad():
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        entailment_judgment = batch['entailment_judgment'].to(device)

        pred_entailment_judgment = model2(
            input_ids=input_ids, 
            attention_mask=attention_mask,
        )

        # Metrics calculation
        acc.update(pred_entailment_judgment, entailment_judgment)
        f1.update(pred_entailment_judgment, entailment_judgment)


print(f"Accuracy: {acc.compute()}")
print(f"F1 Score: {f1.compute()}")
print("")

acc.reset()
f1.reset()

100%|██████████| 308/308 [00:06<00:00, 50.32it/s]

Accuracy: 0.9163791537284851
F1 Score: 0.9058225154876709




