# ABSA Fine-Tuning

In [None]:
from transformers import ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset

# 예시 데이터셋 클래스
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 모델과 토크나이저 로딩 (여기서는 ELECTRA-small 사용)
model_name = "monologg/koelectra-small-v3-discriminator"
tokenizer = ElectraTokenizer.from_pretrained(model_name)
model = ElectraForSequenceClassification.from_pretrained(model_name, num_labels=3)  # 예: 긍정, 중립, 부정

# 데이터셋 준비 (texts와 labels는 여러분의 데이터셋에 맞게 준비)
train_texts = ["리뷰 문장 예시1", "리뷰 문장 예시2", "..."]
train_labels = [0, 1, ...]  # 예: 0: 부정, 1: 중립, 2: 긍정
eval_texts = ["검증 리뷰 문장 예시1", "검증 리뷰 문장 예시2", "..."]
eval_labels = [0, 1, ...]

train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)
eval_dataset = SentimentDataset(eval_texts, eval_labels, tokenizer)

# TrainingArguments 설정
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_dir='./logs',
)

# Trainer 객체 생성 후 학습 진행
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

trainer.train()