In [1]:
!pip install transformers torch datasets



In [1]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

# Load tokenizer and model
model_name = "roberta-base"  # or "roberta-large"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=3, output_hidden_states=True)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

In [2]:
from datasets import load_dataset

# dataset = load_dataset("csv", data_files={"train": "./train/TYPE_train.csv", "test": "./test/TYPE_test.csv"})
dataset = load_dataset("csv", data_files={"train": "./augment/TYPE_aug_train.csv", "test": "./test/TYPE_test.csv"})

print(dataset)
                                          
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Apply preprocessing
tokenized_datasets = dataset.map(preprocess_function, batched=True)

Downloading and preparing dataset csv/default to /home/junho00211/.cache/huggingface/datasets/csv/default-3808da4cc73924d7/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /home/junho00211/.cache/huggingface/datasets/csv/default-3808da4cc73924d7/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 100
    })
})


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [3]:
from torch.utils.data import DataLoader

train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [4]:
import torch
from torch.optim import AdamW
from tqdm import tqdm
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nn.DataParallel(model)
model.to(device)
optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
criterion = nn.CrossEntropyLoss()

num_epochs = 7  # 학습 횟수 설정

for epoch in tqdm(range(num_epochs)):
    model.train()  # 학습 모드 설정
    total_loss = 0

    for batch in train_dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)  # 실제 정답

        optimizer.zero_grad()  # 기존 gradient 초기화
        outputs = model(input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)  # 손실 계산
        loss.backward()  # 역전파 수행
        optimizer.step()  # 모델 업데이트

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

 14%|████████████████████████████████▏                                                                                                                                                                                                | 1/7 [04:01<24:10, 241.78s/it]

Epoch 1, Loss: 0.7886


 29%|████████████████████████████████████████████████████████████████▎                                                                                                                                                                | 2/7 [07:53<19:38, 235.72s/it]

Epoch 2, Loss: 0.5400


 43%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                                | 3/7 [11:44<15:34, 233.60s/it]

Epoch 3, Loss: 0.2184


 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                | 4/7 [15:35<11:37, 232.45s/it]

Epoch 4, Loss: 0.0882


 71%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                | 5/7 [19:25<07:43, 231.89s/it]

Epoch 5, Loss: 0.0372


 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 6/7 [23:16<03:51, 231.45s/it]

Epoch 6, Loss: 0.0353


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [27:06<00:00, 232.41s/it]

Epoch 7, Loss: 0.0247





In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

# F1, Precision, Recall 계산
f1 = f1_score(all_labels, all_preds, average="weighted")
precision = precision_score(all_labels, all_preds, average="weighted")
recall = recall_score(all_labels, all_preds, average="weighted")
accuracy = accuracy_score(all_labels, all_preds)

print(f"Test F1 Score: {f1:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

Test F1 Score: 0.5888
Test Precision: 0.6182
Test Recall: 0.5800
Test Accuracy: 0.5800


In [6]:
GPT_zeroshot = [2, 1, 1, 1, 0, 2, 0, 0, 0, 2, 2, 2, 1, 0, 1, 2, 0, 1, 2, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 2, 1, 2, 2, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 1, 0, 0]
f1 = f1_score(all_labels, GPT_zeroshot, average="weighted")
precision = precision_score(all_labels, GPT_zeroshot, average="weighted")
recall = recall_score(all_labels, GPT_zeroshot, average="weighted")

print(f"Test F1 Score: {f1:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")

Test F1 Score: 0.5207
Test Precision: 0.5916
Test Recall: 0.4800


In [7]:
GPT_fewshot = [1, 0, 1, 2, 0, 2, 1, 0, 0, 1, 2, 2, 1, 0, 0, 2, 0, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 2, 1, 2, 1, 2, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 2, 0, 0]
f1 = f1_score(all_labels, GPT_fewshot, average="weighted")
precision = precision_score(all_labels, GPT_fewshot, average="weighted")
recall = recall_score(all_labels, GPT_fewshot, average="weighted")

print(f"Test F1 Score: {f1:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")

Test F1 Score: 0.5554
Test Precision: 0.5863
Test Recall: 0.5400
