# 자연어처리 과제 5 (9주차)
* 과제는 해당 .ipynb 파일에 코드 작성
    * 코드는 google colab의 gpu를 사용하는 런타임 환경에서 모두실행을 통해 한번에 실행 되어야함
    * 생성형 AI (ChatGPT, Copilot, Claude, ...) 등 사용 가능
        * 단, 사용시 사용한 방법, 입력, 출력을 캡처해 보고서에 기입
* Word를 통해 자유형식으로 보고서를 작성
    * 보고서의 양식은 자유
    * 보고서의 제출은 .pdf 형식으로 제출해야하며, 파일명은 "학번_이름_HW_??.pdf"로 제출 할 것
    * 보고서에 코드를 그대로 복붙 하지 말 것 (캡처 도구를 활용, 환경 설치 자료 참고)
* .ipynb와 .pdf 파일을 el을 통해 제출
    * 예시 : "2232036006_임상훈_HW_01.ipynb"와 "2232036006_임상훈_HW_01.pdf"를 제출

## 1. IMDB PLM 학습 (45점)

* HW 2에서 진행한 IMDB 데이터셋을 학습 및 분류하는 PLM 기반 분류기를 작성하시오.
    * 실습에 사용하지 않은 PLM 모델을 huggingface hub에서 불러와 사용하시오.
        * 실습에 사용한 데이터는 한국어 데이터이지만 IMDB의 경우 영어 데이터임

* 최소 3개의 PLM을 동일 hyperparameter를 통해 학습 및 평가 해 보고 성능을 비교 분석 하시오
    * 각자가 구현 및 제출한 HW2의 모델과 비교하여 어떠한지
    * 각 PLM 별로 성능이 어떠하고 왜 그런지

**GRADING**
* PLM 1개당 15점 (총 45점)

In [None]:
# Colab
# !pip install wandb lightning

In [1]:
import os
import torch
import torch.nn as nn
import random
import numpy as np

np.random.seed(0)
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x757e845e7cd0>

In [3]:
from requests import get

# download files for sentiment classification
def download(url, filename):
    with open(filename, "wb") as file:
        response = get(url)
        file.write(response.content)

download("https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", "aclImdb_v1.tar.gz")

In [31]:
# Colab
# !tar -xzf /content/aclImdb_v1.tar.gz

In [32]:
from pathlib import Path

raw_data_dir = './aclImdb'
data_dir = Path(raw_data_dir)

train_datas = []
test_datas = []

for sentiment in ["pos", "neg"]:
    samples = list(data_dir.glob(f"train/{sentiment}/*.txt"))
    train_datas.extend(samples)

for sentiment in ["pos", "neg"]:
    samples = list(data_dir.glob(f"test/{sentiment}/*.txt"))
    test_datas.extend(samples)

train_file = open("train.txt", "w", encoding="utf-8")
test_file = open("test.txt", "w", encoding="utf-8")

for file, datas in [(train_file, train_datas), (test_file, test_datas)]:
    file.write("id\ttext\tlabel\n")
    for data in datas:
        lines = [line.strip().replace("\t", " ") for line in data.open().readlines()]
        text = " ".join(lines)
        id = data.name[:-4]
        label = 1 if "pos" in data.parts else 0
        file.write(f"{id}\t{text}\t{label}\n")

train_file.close()
test_file.close()

In [2]:
with open("train.txt", "r", encoding="utf-8") as file:
    contents = file.read()
    lines = contents.split("\n")[1:]
    train_data = [line.split("\t") for line in lines if len(line) > 0]

with open("test.txt", "r", encoding="utf-8") as file:
    contents = file.read()
    lines = contents.split("\n")[1:]
    test_data = [line.split("\t") for line in lines if len(line) > 0]

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import tqdm

plm_name = "lvwerra/distilbert-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from torch.utils.data import Dataset, DataLoader

# define dataset class
class SentimentDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label = int(self.data[index][2])
        text = self.data[index][1]
        inputs = self.tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
        inputs = {key: inputs[key].squeeze() for key in inputs}

        return inputs|{'label':torch.tensor(label)}

In [5]:
train_dataset = SentimentDataset(train_data,tokenizer)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

for batch in train_loader:
    label = batch.pop('label')
    model(**batch)
    break

In [33]:
import lightning as pl

class SentimentClassifierPL(pl.LightningModule):
    def __init__(self, sentiment_classifier):
        super(SentimentClassifierPL, self).__init__()
        self.model = sentiment_classifier

        self.validation_step_outputs = []
        self.test_step_outputs = []
        self.save_hyperparameters()

    def forward(self, inputs):
        return self.model(inputs)

    def training_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("val_loss", loss)
        self.validation_step_outputs.append((loss, logits, labels))
        return loss, outputs, labels

    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        avg_loss = torch.stack([x[0] for x in outputs]).mean()
        self.log("avg_val_loss", avg_loss)

        all_outputs = torch.cat([x[1] for x in outputs])
        all_labels = torch.cat([x[2] for x in outputs])
        all_preds = all_outputs.argmax(dim=1)
        accuracy = (all_preds == all_labels).float().mean()
        self.log("val_accuracy", accuracy)
        self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("test_loss", loss)
        self.test_step_outputs.append((loss, logits, labels))
        return loss, outputs, labels

    def on_test_epoch_end(self):
        outputs = self.test_step_outputs
        avg_loss = torch.stack([x[0] for x in outputs]).mean()
        self.log("avg_test_loss", avg_loss)

        all_outputs = torch.cat([x[1] for x in outputs])
        all_labels = torch.cat([x[2] for x in outputs])
        all_preds = all_outputs.argmax(dim=1)
        accuracy = (all_preds == all_labels).float().mean()
        self.log("test_accuracy", accuracy)
        self.test_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-6)
        return optimizer

In [34]:
import wandb
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelSummary

wandb.login()

def check_performance(model,tokenizer, train_data, test_data, wandb_log_name):
    wandb_logger = WandbLogger(project="NLP", name=wandb_log_name, group="HW05")

    pl_model = SentimentClassifierPL(model)

    train_dataset = SentimentDataset(train_data,tokenizer)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataset = SentimentDataset(test_data,tokenizer)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_dataset = SentimentDataset(test_data,tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    trainer = pl.Trainer(
        max_epochs=1,
        accelerator="gpu",
        logger=wandb_logger,
        callbacks=[ModelSummary(max_depth=2)],
        precision=16
    )

    trainer.fit(
        model=pl_model,
        train_dataloaders=train_loader,
        val_dataloaders=val_loader
    )

    trainer.test(dataloaders=test_loader)

    wandb.finish()

### (1) DistilBert

In [None]:
pl_model = SentimentClassifierPL(model)

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.


In [None]:
check_performance(pl_model, tokenizer, train_data, test_data, "DistilBert-IMDB")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                                | Params
--------------------------------------------------------------------
0 | model       | SentimentClassifierPL               | 67.0 M
1 | model.model | DistilBertForSequenceClassification | 67.0 M
--------------------------------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params
267.820   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 391/391 [01:11<00:00,  5.50it/s, v_num=72qh]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 391/391 [01:12<00:00,  5.42it/s, v_num=72qh]


Restoring states from the checkpoint path at ./NLP/vnyu72qh/checkpoints/epoch=0-step=391.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/vnyu72qh/checkpoints/epoch=0-step=391.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 391/391 [00:16<00:00, 23.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.2904364764690399
      test_accuracy         0.8765599727630615
        test_loss           0.2904650568962097
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▂▃█▁▄▇▅
trainer/global_step,▁▂▃▄▅▆▇██
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.29044
avg_val_loss,0.29044
epoch,1.0
test_accuracy,0.87656
test_loss,0.29047
train_loss,0.25875
trainer/global_step,391.0
val_accuracy,0.87656
val_loss,0.29047


### (2) XLNet

In [None]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

In [None]:
pl_model = SentimentClassifierPL(model)

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.


In [None]:
check_performance(pl_model, tokenizer, train_data, test_data, "XLNet-IMDB")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 782/782 [04:28<00:00,  2.92it/s, v_num=33bw]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 782/782 [04:29<00:00,  2.90it/s, v_num=33bw]


Restoring states from the checkpoint path at ./NLP/1ced33bw/checkpoints/epoch=0-step=782.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/1ced33bw/checkpoints/epoch=0-step=782.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [01:17<00:00, 10.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss          0.251415491104126
      test_accuracy         0.9017599821090698
        test_loss           0.2514938712120056
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▂▃▃▂▃▂▇▁█▃▅▅▄▆▂
trainer/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇███
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.25142
avg_val_loss,0.25142
epoch,1.0
test_accuracy,0.90176
test_loss,0.25149
train_loss,0.07707
trainer/global_step,782.0
val_accuracy,0.90176
val_loss,0.25149


### (3) OPT

In [None]:
plm_name = "edbeeching/opt-125m-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at edbeeching/opt-125m-imdb and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
pl_model = SentimentClassifierPL(model)

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.


In [None]:
check_performance(pl_model, tokenizer, train_data, test_data, "OPT-IMDB")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                         | Params
-------------------------------------------------------------
0 | model       | SentimentClassifierPL        | 125 M 
1 | model.model | OPTForSequenceClassification | 125 M 
-------------------------------------------------------------
125 M     Trainable params
0         Non-trainable params
125 M     Total params
500.963   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 782/782 [02:55<00:00,  4.45it/s, v_num=349g]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 782/782 [02:57<00:00,  4.40it/s, v_num=349g]


Restoring states from the checkpoint path at ./NLP/4k2c349g/checkpoints/epoch=0-step=782.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/4k2c349g/checkpoints/epoch=0-step=782.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [00:43<00:00, 17.87it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.2678300142288208
      test_accuracy         0.8889999985694885
        test_loss           0.2679620385169983
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,█▄▁▆▅▄▂▇▅▄▄▃▂▃▄
trainer/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇███
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.26783
avg_val_loss,0.26783
epoch,1.0
test_accuracy,0.889
test_loss,0.26796
train_loss,0.28882
trainer/global_step,782.0
val_accuracy,0.889
val_loss,0.26796


## 2. Hyperparameter 조정 (15점)

* 1번 과제의 가장 성능이 좋은 모델을 통해 hyperparameter 변화에 따른 성능을 비교하시오.
    * Batch size, learning rate에 대한 성능 비교 필수

    
**GRADING**
* Hyperparameter 당 5점 (총 15점)

In [6]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

In [7]:
import lightning as pl

class SentimentClassifierPL(pl.LightningModule):
    def __init__(self, sentiment_classifier, lr):
        super(SentimentClassifierPL, self).__init__()
        self.model = sentiment_classifier
        self.lr = lr

        self.validation_step_outputs = []
        self.test_step_outputs = []
        self.save_hyperparameters()

    def forward(self, inputs):
        return self.model(inputs)

    def training_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("val_loss", loss)
        self.validation_step_outputs.append((loss, logits, labels))
        return loss, outputs, labels

    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        avg_loss = torch.stack([x[0] for x in outputs]).mean()
        self.log("avg_val_loss", avg_loss)

        all_outputs = torch.cat([x[1] for x in outputs])
        all_labels = torch.cat([x[2] for x in outputs])
        all_preds = all_outputs.argmax(dim=1)
        accuracy = (all_preds == all_labels).float().mean()
        self.log("val_accuracy", accuracy)
        self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        labels = batch.pop('label')
        outputs = model(**batch, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        self.log("test_loss", loss)
        self.test_step_outputs.append((loss, logits, labels))
        return loss, outputs, labels

    def on_test_epoch_end(self):
        outputs = self.test_step_outputs
        avg_loss = torch.stack([x[0] for x in outputs]).mean()
        self.log("avg_test_loss", avg_loss)

        all_outputs = torch.cat([x[1] for x in outputs])
        all_labels = torch.cat([x[2] for x in outputs])
        all_preds = all_outputs.argmax(dim=1)
        accuracy = (all_preds == all_labels).float().mean()
        self.log("test_accuracy", accuracy)
        self.test_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        return optimizer

In [11]:
import wandb
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelSummary

wandb.login()

def check_performance(model,tokenizer, train_data, test_data, epoch, batch, lr, wandb_log_name):
    wandb_logger = WandbLogger(project="NLP", name=wandb_log_name, group="HW05")

    pl_model = SentimentClassifierPL(model, lr)

    train_dataset = SentimentDataset(train_data,tokenizer)
    train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
    val_dataset = SentimentDataset(test_data,tokenizer)
    val_loader = DataLoader(val_dataset, batch_size=batch, shuffle=False)
    test_dataset = SentimentDataset(test_data,tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=batch, shuffle=False)

    trainer = pl.Trainer(
        max_epochs=epoch,
        accelerator="gpu",
        logger=wandb_logger,
        callbacks=[ModelSummary(max_depth=2)],
        precision=16
    )

    trainer.fit(
        model=pl_model,
        train_dataloaders=train_loader,
        val_dataloaders=val_loader
    )

    trainer.test(dataloaders=test_loader)

    wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnoeyhesx[0m. Use [1m`wandb login --relogin`[0m to force relogin


### (1) Batch Size 조절

In [38]:
lr = 5e-6
pl_model = SentimentClassifierPL(model, lr)

In [None]:
check_performance(pl_model, tokenizer, train_data, test_data, 1, 16, lr, "XLNet-Batch16")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 54.66it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1563/1563 [04:15<00:00,  6.13it/s, v_num=j1t1]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 1563/1563 [04:17<00:00,  6.08it/s, v_num=j1t1]


Restoring states from the checkpoint path at ./NLP/qqznj1t1/checkpoints/epoch=0-step=1563.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/qqznj1t1/checkpoints/epoch=0-step=1563.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 1563/1563 [01:01<00:00, 25.62it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.3387964963912964
      test_accuracy         0.8987199664115906
        test_loss           0.3388260304927826
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▁▃▃▁▁▂▁▁▁▂▁▁▁▃▂▂▁▁▁▁▂▁▁▁▆▁▂█▂▄▅
trainer/global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.3388
avg_val_loss,0.3388
epoch,1.0
test_accuracy,0.89872
test_loss,0.33883
train_loss,0.19262
trainer/global_step,1563.0
val_accuracy,0.89872
val_loss,0.33883


In [None]:
lr = 5e-6
pl_model = SentimentClassifierPL(model, lr)

In [39]:
check_performance(pl_model, tokenizer, train_data, test_data, 1, 128, lr, "XLNet-Batch128")

/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
INFO:lightning.pytorch.utilities.rank_zero:Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=1` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
INFO: Restoring states from the checkpoint path at ./NLP/y3u7q244/checkpoints/epoch=0-step=196.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Restoring states from the checkpoint path at ./NLP/y3u7q244/checkpoints/epoch=0-step=196.ckpt
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: Loaded model weights from the checkpoint at ./NLP/y3u7q244/checkpoints/epoch=0-step=196.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Loaded model weights from the checkpoint at ./NLP/y3u7q244/checkpoints/epoch=0-step=196.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.014 MB uploaded\r'), FloatProgress(value=0.08646901011212768, max=1.…

0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▃█▁
trainer/global_step,▁▃▆██
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.23204
avg_val_loss,0.23204
epoch,1.0
test_accuracy,0.90472
test_loss,0.23207
train_loss,0.07869
trainer/global_step,196.0
val_accuracy,0.90472
val_loss,0.23207


### (2) Learning Rate 조절

In [15]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

lr = 1e-6
pl_model = SentimentClassifierPL(model, lr)
check_performance(pl_model, tokenizer, train_data, test_data, 1, 32, lr, "XLNet-LR1e-6")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 35.42it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 782/782 [04:31<00:00,  2.88it/s, v_num=vzsu]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 782/782 [04:33<00:00,  2.86it/s, v_num=vzsu]


Restoring states from the checkpoint path at ./NLP/b670vzsu/checkpoints/epoch=0-step=782.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/b670vzsu/checkpoints/epoch=0-step=782.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [01:21<00:00,  9.55it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.24275422096252441
      test_accuracy         0.9020799994468689
        test_loss           0.24275566637516022
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▂▁▁▇▄▄▃▄▅▇▃▅▄█▇
trainer/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇███
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.24275
avg_val_loss,0.24275
epoch,1.0
test_accuracy,0.90208
test_loss,0.24276
train_loss,0.23843
trainer/global_step,782.0
val_accuracy,0.90208
val_loss,0.24276


In [16]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

lr = 3e-5
pl_model = SentimentClassifierPL(model, lr)
check_performance(pl_model, tokenizer, train_data, test_data, 1, 32, lr, "XLNet-LR3e-5")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 27.85it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 782/782 [04:27<00:00,  2.92it/s, v_num=0yzg]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 782/782 [04:29<00:00,  2.91it/s, v_num=0yzg]


Restoring states from the checkpoint path at ./NLP/q1lx0yzg/checkpoints/epoch=0-step=782.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/q1lx0yzg/checkpoints/epoch=0-step=782.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [01:22<00:00,  9.44it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.2701209485530853
      test_accuracy         0.8954399824142456
        test_loss           0.2697997987270355
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_accuracy,▁
test_loss,▁
train_loss,▃▁▂▂▄▂▂▃▁█▅▂▂▁▂
trainer/global_step,▁▁▂▂▃▃▄▄▅▅▆▆▇▇███
val_accuracy,▁
val_loss,▁

0,1
avg_test_loss,0.27012
avg_val_loss,0.27012
epoch,1.0
test_accuracy,0.89544
test_loss,0.2698
train_loss,0.14027
trainer/global_step,782.0
val_accuracy,0.89544
val_loss,0.2698


### (3) Epoch 조절

In [17]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

lr = 5e-6
pl_model = SentimentClassifierPL(model, lr)
check_performance(pl_model, tokenizer, train_data, test_data, 5, 32, lr, "XLNet-Epoch5")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 65.59it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 4: 100%|██████████| 782/782 [04:11<00:00,  3.11it/s, v_num=jrdy]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 782/782 [04:13<00:00,  3.09it/s, v_num=jrdy]


Restoring states from the checkpoint path at ./NLP/025vjrdy/checkpoints/epoch=4-step=3910.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/025vjrdy/checkpoints/epoch=4-step=3910.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [01:15<00:00, 10.38it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss         0.5729934573173523
      test_accuracy         0.8979599475860596
        test_loss           0.5726692080497742
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁▃▆▇█
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
test_accuracy,▁
test_loss,▁
train_loss,▂▃▄█▄▄▃▃▄▅▂▂▁▂▄▃▁▁▁▂▂▁▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_accuracy,██▁▇▃
val_loss,▁▃▆▇█

0,1
avg_test_loss,0.57299
avg_val_loss,0.57299
epoch,5.0
test_accuracy,0.89796
test_loss,0.57267
train_loss,0.00089
trainer/global_step,3910.0
val_accuracy,0.89796
val_loss,0.57267


In [18]:
plm_name = "textattack/xlnet-base-cased-imdb"

tokenizer = AutoTokenizer.from_pretrained(plm_name)
model = AutoModelForSequenceClassification.from_pretrained(plm_name, num_labels=2)

lr = 5e-6
pl_model = SentimentClassifierPL(model, lr)
check_performance(pl_model, tokenizer, train_data, test_data, 10, 32, lr, "XLNet-Epoch10")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'sentiment_classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['sentiment_classifier'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                           | Params
---------------------------------------------------------------
0 | model       | SentimentClassifierPL          | 117 M 
1 | model.model | XLNetForSequenceClassification | 117 M 
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total params
469.242   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 58.96it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 782/782 [04:08<00:00,  3.15it/s, v_num=xywl]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 782/782 [04:10<00:00,  3.13it/s, v_num=xywl]


Restoring states from the checkpoint path at ./NLP/l6d2xywl/checkpoints/epoch=9-step=7820.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/l6d2xywl/checkpoints/epoch=9-step=7820.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 782/782 [01:15<00:00, 10.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      avg_test_loss          0.793919026851654
      test_accuracy         0.8904799818992615
        test_loss           0.7931963205337524
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
avg_test_loss,▁
avg_val_loss,▁▂▄▄▆▆▅▅▇█
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
test_accuracy,▁
test_loss,▁
train_loss,▅█▆▄▃█▁▂▃▁▂▁▁▁▁▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_accuracy,█▆▃▅▄▄▄▅▅▁
val_loss,▁▂▄▄▆▆▅▅▆█

0,1
avg_test_loss,0.79392
avg_val_loss,0.79392
epoch,10.0
test_accuracy,0.89048
test_loss,0.7932
train_loss,0.00028
trainer/global_step,7820.0
val_accuracy,0.89048
val_loss,0.7932


## 3. T5를 통한 Machine Translation 모델 학습 (40점)

* Huggingface hub의 T5 모듈 및 weight을 통해 Seq2Seq 실습에 사용한 English-French MT모델을 구현하시오.
    * Seq2Seq 실습에 사용한 데이터셋 사용 (http://www.manythings.org/anki/fra-eng.zip)
    * T5 모델은 "google-t5/t5-small"를 사용
    * 참고: https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5ForConditionalGeneration
    * 실습에 사용한 모델과의 성능 비교

**GRADING**
* 학습 및 평가 (40점)

In [1]:
import os
import re
import zipfile
import numpy as np
import unicodedata
import random

In [2]:
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

def download_zip(url, output_path):
    response = requests.get(url, headers=headers, stream=True)
    if response.status_code == 200:
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"ZIP file downloaded to {output_path}")
    else:
        print(f"Failed to download. HTTP Response Code: {response.status_code}")

url = "http://www.manythings.org/anki/fra-eng.zip"
output_path = "fra-eng.zip"

if not os.path.exists(output_path):
    download_zip(url, output_path)

    path = os.getcwd()
    zipfilename = os.path.join(path, output_path)

    with zipfile.ZipFile(zipfilename, 'r') as zip_ref:
        zip_ref.extractall(path)

In [3]:
def load_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    pairs = [line.strip().split('\t')[:2] for line in lines]
    return pairs

data_path = "./fra.txt"
data = load_data(data_path)

In [4]:
def split_data(data, train_ratio=0.7, shuffle=True):
    data= list(data)
    if shuffle:
        random.shuffle(data)
    n_train = int(len(data) * train_ratio)
    train_data = data[:n_train]
    test_data = data[n_train:]
    return train_data, test_data

train_test_ratio = 0.9
train, test = split_data(data, train_test_ratio)
train, vali = split_data(train, train_test_ratio)

In [5]:
len(train), len(vali), len(test)

(188515, 20947, 23274)

In [6]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import tqdm

plm_name = "google-t5/t5-small"

tokenizer = T5Tokenizer.from_pretrained(plm_name)
model = T5ForConditionalGeneration.from_pretrained(plm_name)

  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [8]:
data_file = os.path.join("fra.txt")

with open(data_file, 'r', encoding='utf-8') as f:
    lines = f.readlines()

pairs = [line.strip().split('\t') for line in lines]
eng_sentences = [pair[0] for pair in pairs]
fra_sentences = [pair[1] for pair in pairs]

inputs = tokenizer(eng_sentences, max_length=30, truncation=True, padding='max_length', return_tensors='pt')
targets = tokenizer(fra_sentences, max_length=30, truncation=True, padding='max_length', return_tensors='pt')

print(inputs['input_ids'])
print(targets['input_ids'])

tensor([[1263,    5,    1,  ...,    0,    0,    0],
        [1263,    5,    1,  ...,    0,    0,    0],
        [1263,    5,    1,  ...,    0,    0,    0],
        ...,
        [ 156,  841,  113,  ..., 4461,   24,    1],
        [  94,  164,   36,  ...,    3,   99,    1],
        [  96,  196,  877,  ...,    7,   48,    1]])
tensor([[ 2964,     3,    55,  ...,     0,     0,     0],
        [ 1332,    15,     5,  ...,     0,     0,     0],
        [  695,  2981,     3,  ...,     0,     0,     0],
        ...,
        [  925, 22002,    31,  ...,     3, 18530,     1],
        [  802,   259,  1351,  ...,    50,  1405,     1],
        [  673,  1022,  3448,  ...,  4223, 11891,     1]])


In [9]:
class EnToFrDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        src, trg = self.data[idx]
        src_sample = self.tokenizer(src, max_length=30, padding="max_length", truncation=True, return_tensors="pt")
        trg_sample = self.tokenizer(trg, max_length=30, padding="max_length", truncation=True, return_tensors="pt")

        input_ids = src_sample.input_ids.squeeze()
        attention_mask = src_sample.attention_mask.squeeze()
        label = trg_sample.input_ids.squeeze()

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": label
        }

In [10]:
train_dataset = EnToFrDataset(train, tokenizer)
vali_dataset = EnToFrDataset(vali, tokenizer)
test_dataset = EnToFrDataset(test, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=32,drop_last=True, shuffle=True, num_workers=8)
vali_loader = DataLoader(vali_dataset, batch_size=32,drop_last=True, shuffle=False, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=32,drop_last=True, shuffle=False, num_workers=8)

In [11]:
import math
import torch
import torch.nn as nn
import lightning as pl

class TranslationModelPL(pl.LightningModule):
    def __init__(self, model):
        super(TranslationModelPL, self).__init__()
        self.model = model
        self.criterion = nn.CrossEntropyLoss(ignore_index=0)
        self.save_hyperparameters()

    def forward(self, input_ids, attention_mask, labels=None):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

    def training_step(self, batch, batch_idx):
        outputs = self(**batch)
        loss = outputs.loss
        logits = outputs.logits
        ppl = torch.exp(loss)

        self.log("train_loss", loss, prog_bar=True)
        self.log("train_PPL", ppl, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        outputs = self(**batch)
        loss = outputs.loss
        logits = outputs.logits
        ppl = torch.exp(loss)

        self.log("val_loss", loss, prog_bar=True)
        self.log("val_PPL", ppl, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        outputs = self(**batch)
        loss = outputs.loss
        logits = outputs.logits
        ppl = torch.exp(loss)

        self.log("test_loss", loss, prog_bar=True)
        self.log("test_PPL", ppl, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=5e-6)



In [12]:
import wandb
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelSummary

wandb.login()

def check_performance(model, tokenizer, train_data, vali_data, test_data, wandb_log_name):
    wandb_logger = WandbLogger(project="NLP", name=wandb_log_name, group="HW05")

    pl_model = TranslationModelPL(model)

    train_dataset = EnToFrDataset(train_data,tokenizer)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataset = EnToFrDataset(vali_data,tokenizer)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_dataset = EnToFrDataset(test_data,tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    trainer = pl.Trainer(
        max_epochs=1,
        accelerator="gpu",
        logger=wandb_logger,
        callbacks=[ModelSummary(max_depth=2)],
        precision=16
    )

    trainer.fit(
        model=pl_model,
        train_dataloaders=train_loader,
        val_dataloaders=val_loader
    )

    trainer.test(dataloaders=test_loader)

    wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnoeyhesx[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
check_performance(model, tokenizer, train, vali, test, "T5-small")

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type                       | Params
-------------------------------------------------------------
0 | model         | T5ForConditionalGeneration | 60.5 M
1 | model.shared  | Embedding                  | 16.4 M
2 | model.encoder | T5Stack                    | 35.3 M
3 | model.decoder | T5Stack                    | 41.6 M
4 | model.lm_head | Linear                     | 16.4 M
5 | criterion     | CrossEntropyLoss           | 0     
-------------------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
242.026   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 5892/5892 [04:50<00:00, 20.25it/s, v_num=vd39, train_loss=0.360, train_PPL=1.430, val_loss=0.335, val_PPL=1.400]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 5892/5892 [04:51<00:00, 20.20it/s, v_num=vd39, train_loss=0.360, train_PPL=1.430, val_loss=0.335, val_PPL=1.400]


Restoring states from the checkpoint path at ./NLP/j9npvd39/checkpoints/epoch=0-step=5892.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at ./NLP/j9npvd39/checkpoints/epoch=0-step=5892.ckpt
/home/dev/anaconda3/envs/nlp/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 728/728 [00:08<00:00, 89.35it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_PPL             1.398276925086975
        test_loss           0.33381375670433044
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
test_PPL,▁
test_loss,▁
train_PPL,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_PPL,▁
val_loss,▁

0,1
epoch,1.0
test_PPL,1.39828
test_loss,0.33381
train_PPL,1.35628
train_loss,0.30475
trainer/global_step,5892.0
val_PPL,1.39941
val_loss,0.3347


In [14]:
test_data = test_dataset.__getitem__(1011)

src_text = tokenizer.decode(test_data["input_ids"], skip_special_tokens=True)
target_text = tokenizer.decode(test_data["labels"], skip_special_tokens=True)

In [15]:
input_ids = tokenizer.encode(src_text, return_tensors="pt")
outputs = model.generate(input_ids, max_length=30, num_beams=4, early_stopping=True)
model_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [16]:
print("Input:", src_text)
print("Target:", target_text)
print("Model Output:", model_output)

Input: I wrote a letter to my teacher.
Target: J'ai écrit une lettre à mon institutrice.
Model Output: J'ai écrit une lettre à mon professeur.
