In [1]:
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score, f1_score


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# gpu 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
#1 tokenizer 모델 준비
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
#2 LoRA 설정 (논문 방식대로 Q,V 가중치 적용 )
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.2,
    target_modules=["query", "value"],
    bias="none",
    task_type="SEQ_CLS" #시퀀스 분류 작업 
)

In [5]:
#3 모델에 로라 적용
lora_model = get_peft_model(model, lora_config)

In [6]:
#4 데이터셋 로드
dataset = load_dataset("glue", "sst2") #감정 분류 데이터셋
def preprocess_funtion(examples):
    return tokenizer(examples["sentence"], truncation=True, padding=True)

In [7]:
encoded_dataset = dataset.map(preprocess_funtion, batched=True)

In [8]:
# Accuracy 및 f1계산 함수 정의 
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy" : accuracy, "f1" : f1}

In [9]:
#5 Trainer를 활용한 모델 학습
training_args = TrainingArguments(
    output_dir = "./results_lora",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer_lora = Trainer(
    model=lora_model,
    args = training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)



In [10]:
# 6 로라 모델 학습.train()
trainer_lora.train()

  8%|▊         | 500/6315 [00:24<04:45, 20.36it/s]

{'loss': 0.5553, 'grad_norm': 3.3077778816223145, 'learning_rate': 1.8416468725257326e-05, 'epoch': 0.24}


 16%|█▌        | 1000/6315 [00:49<04:21, 20.31it/s]

{'loss': 0.2968, 'grad_norm': 6.173166751861572, 'learning_rate': 1.6832937450514647e-05, 'epoch': 0.48}


 24%|██▍       | 1500/6315 [01:14<03:57, 20.24it/s]

{'loss': 0.2828, 'grad_norm': 9.152865409851074, 'learning_rate': 1.5249406175771972e-05, 'epoch': 0.71}


 32%|███▏      | 2000/6315 [01:40<03:35, 20.01it/s]

{'loss': 0.2685, 'grad_norm': 5.13259220123291, 'learning_rate': 1.3665874901029297e-05, 'epoch': 0.95}


                                                   
 33%|███▎      | 2109/6315 [01:46<06:25, 10.92it/s]

{'eval_loss': 0.21919484436511993, 'eval_accuracy': 0.9151376146788991, 'eval_f1': 0.9151000768149341, 'eval_runtime': 0.6074, 'eval_samples_per_second': 1435.644, 'eval_steps_per_second': 46.099, 'epoch': 1.0}


 40%|███▉      | 2500/6315 [02:06<03:10, 20.02it/s]

{'loss': 0.2627, 'grad_norm': 5.5596022605896, 'learning_rate': 1.208234362628662e-05, 'epoch': 1.19}


 48%|████▊     | 3000/6315 [02:31<02:45, 20.01it/s]

{'loss': 0.2551, 'grad_norm': 16.22600746154785, 'learning_rate': 1.0498812351543943e-05, 'epoch': 1.43}


 55%|█████▌    | 3500/6315 [02:56<02:18, 20.33it/s]

{'loss': 0.2587, 'grad_norm': 3.2146382331848145, 'learning_rate': 8.915281076801267e-06, 'epoch': 1.66}


 63%|██████▎   | 4000/6315 [03:22<01:55, 19.99it/s]

{'loss': 0.2519, 'grad_norm': 4.5157470703125, 'learning_rate': 7.331749802058591e-06, 'epoch': 1.9}


                                                   
 67%|██████▋   | 4213/6315 [03:33<03:49,  9.16it/s]

{'eval_loss': 0.2154306173324585, 'eval_accuracy': 0.9220183486238532, 'eval_f1': 0.921944382510582, 'eval_runtime': 0.5976, 'eval_samples_per_second': 1459.218, 'eval_steps_per_second': 46.856, 'epoch': 2.0}


 71%|███████▏  | 4500/6315 [03:47<01:29, 20.19it/s]

{'loss': 0.2445, 'grad_norm': 6.649644374847412, 'learning_rate': 5.748218527315916e-06, 'epoch': 2.14}


 79%|███████▉  | 5000/6315 [04:13<01:05, 20.17it/s]

{'loss': 0.2418, 'grad_norm': 6.725372791290283, 'learning_rate': 4.164687252573238e-06, 'epoch': 2.38}


 87%|████████▋ | 5500/6315 [04:38<00:40, 20.07it/s]

{'loss': 0.2482, 'grad_norm': 4.537880897521973, 'learning_rate': 2.581155977830562e-06, 'epoch': 2.61}


 95%|█████████▌| 6000/6315 [05:03<00:15, 20.31it/s]

{'loss': 0.2465, 'grad_norm': 5.235178470611572, 'learning_rate': 9.976247030878861e-07, 'epoch': 2.85}


                                                   
100%|██████████| 6315/6315 [05:21<00:00, 19.67it/s]

{'eval_loss': 0.20867016911506653, 'eval_accuracy': 0.9220183486238532, 'eval_f1': 0.9219752242215248, 'eval_runtime': 0.5969, 'eval_samples_per_second': 1460.842, 'eval_steps_per_second': 46.908, 'epoch': 3.0}
{'train_runtime': 321.0203, 'train_samples_per_second': 629.39, 'train_steps_per_second': 19.672, 'train_loss': 0.28208023397486454, 'epoch': 3.0}





TrainOutput(global_step=6315, training_loss=0.28208023397486454, metrics={'train_runtime': 321.0203, 'train_samples_per_second': 629.39, 'train_steps_per_second': 19.672, 'total_flos': 6904336917921720.0, 'train_loss': 0.28208023397486454, 'epoch': 3.0})

In [11]:
#7 기존 모델 (roberta-base) 와 성능비교
roberta_base_model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2).to(device)

training_args_base = TrainingArguments(
    output_dir="./results_roberta",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,    
)

trainer_base = Trainer(
    model=roberta_base_model,
    args=training_args,
    train_dataset = encoded_dataset["train"],
    eval_dataset= encoded_dataset["validation"],
    tokenizer = tokenizer,
    compute_metrics=compute_metrics
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# 8 기존 모델 훈련
trainer_base.train()

  8%|▊         | 500/6315 [00:40<07:57, 12.19it/s]

{'loss': 0.3321, 'grad_norm': 13.665548324584961, 'learning_rate': 1.8416468725257326e-05, 'epoch': 0.24}


 16%|█▌        | 1000/6315 [01:23<07:14, 12.23it/s]

{'loss': 0.2283, 'grad_norm': 11.887150764465332, 'learning_rate': 1.6832937450514647e-05, 'epoch': 0.48}


 24%|██▍       | 1500/6315 [02:05<06:30, 12.33it/s]

{'loss': 0.2002, 'grad_norm': 15.951375007629395, 'learning_rate': 1.5249406175771972e-05, 'epoch': 0.71}


 32%|███▏      | 2000/6315 [02:47<05:52, 12.23it/s]

{'loss': 0.1878, 'grad_norm': 17.91921043395996, 'learning_rate': 1.3665874901029297e-05, 'epoch': 0.95}


                                                   
 33%|███▎      | 2106/6315 [02:57<11:47,  5.95it/s]

{'eval_loss': 0.19213838875293732, 'eval_accuracy': 0.9415137614678899, 'eval_f1': 0.9415056805262325, 'eval_runtime': 0.6079, 'eval_samples_per_second': 1434.543, 'eval_steps_per_second': 46.063, 'epoch': 1.0}


 40%|███▉      | 2500/6315 [03:30<05:15, 12.09it/s]

{'loss': 0.1513, 'grad_norm': 3.83817982673645, 'learning_rate': 1.208234362628662e-05, 'epoch': 1.19}


 48%|████▊     | 3000/6315 [04:12<04:34, 12.07it/s]

{'loss': 0.1405, 'grad_norm': 7.973518371582031, 'learning_rate': 1.0498812351543943e-05, 'epoch': 1.43}


 55%|█████▌    | 3500/6315 [04:54<03:50, 12.24it/s]

{'loss': 0.1354, 'grad_norm': 1.7485662698745728, 'learning_rate': 8.915281076801267e-06, 'epoch': 1.66}


 63%|██████▎   | 4000/6315 [05:36<03:09, 12.20it/s]

{'loss': 0.1324, 'grad_norm': 1.8768327236175537, 'learning_rate': 7.331749802058591e-06, 'epoch': 1.9}


                                                   
 67%|██████▋   | 4212/6315 [05:56<05:57,  5.88it/s]

{'eval_loss': 0.24242940545082092, 'eval_accuracy': 0.9403669724770642, 'eval_f1': 0.9403339949929308, 'eval_runtime': 0.6089, 'eval_samples_per_second': 1432.02, 'eval_steps_per_second': 45.982, 'epoch': 2.0}


 71%|███████▏  | 4500/6315 [06:19<02:28, 12.18it/s]

{'loss': 0.1149, 'grad_norm': 23.693889617919922, 'learning_rate': 5.748218527315916e-06, 'epoch': 2.14}


 79%|███████▉  | 5000/6315 [07:01<01:47, 12.26it/s]

{'loss': 0.1019, 'grad_norm': 10.530866622924805, 'learning_rate': 4.164687252573238e-06, 'epoch': 2.38}


 87%|████████▋ | 5500/6315 [07:44<01:06, 12.20it/s]

{'loss': 0.1028, 'grad_norm': 2.3642587661743164, 'learning_rate': 2.581155977830562e-06, 'epoch': 2.61}


 95%|█████████▌| 6000/6315 [08:26<00:25, 12.23it/s]

{'loss': 0.0952, 'grad_norm': 15.542317390441895, 'learning_rate': 9.976247030878861e-07, 'epoch': 2.85}


                                                   
100%|██████████| 6315/6315 [08:55<00:00, 11.80it/s]

{'eval_loss': 0.23489657044410706, 'eval_accuracy': 0.9438073394495413, 'eval_f1': 0.9437995754075569, 'eval_runtime': 0.594, 'eval_samples_per_second': 1468.007, 'eval_steps_per_second': 47.138, 'epoch': 3.0}
{'train_runtime': 535.3253, 'train_samples_per_second': 377.428, 'train_steps_per_second': 11.797, 'train_loss': 0.15710773422712385, 'epoch': 3.0}





TrainOutput(global_step=6315, training_loss=0.15710773422712385, metrics={'train_runtime': 535.3253, 'train_samples_per_second': 377.428, 'train_steps_per_second': 11.797, 'total_flos': 6845227230107100.0, 'train_loss': 0.15710773422712385, 'epoch': 3.0})

In [13]:
# 8. 성능 평가
print("LoRA 모델 평가 결과:")
trainer_lora.evaluate()

print("기존 RoBERTa 모델 평가 결과:")
trainer_base.evaluate()

LoRA 모델 평가 결과:


100%|██████████| 28/28 [00:00<00:00, 48.39it/s]


기존 RoBERTa 모델 평가 결과:


100%|██████████| 28/28 [00:00<00:00, 48.90it/s]


{'eval_loss': 0.23489657044410706,
 'eval_accuracy': 0.9438073394495413,
 'eval_f1': 0.9437995754075569,
 'eval_runtime': 0.5968,
 'eval_samples_per_second': 1461.227,
 'eval_steps_per_second': 46.92,
 'epoch': 3.0}

In [14]:
# 9. 모델 파라미터 수 비교
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

lora_params = count_parameters(lora_model)
roberta_params = count_parameters(roberta_base_model)

print(f"LoRA 모델 파라미터 수: {lora_params}")
print(f"기존 RoBERTa 모델 파라미터 수: {roberta_params}")

LoRA 모델 파라미터 수: 739586
기존 RoBERTa 모델 파라미터 수: 124647170


In [15]:

# 10 성능 평가 및 비교 (정확도와 F1)
print("LoRA 모델 평가 결과:")
lora_eval = trainer_lora.evaluate()

print("기존 RoBERTa 모델 평가 결과:")
base_eval = trainer_base.evaluate()

# 성능 출력
print(f"LoRA 모델 성능: Accuracy={lora_eval['eval_accuracy']}, F1={lora_eval['eval_f1']}")
print(f"기존 RoBERTa 모델 성능: Accuracy={base_eval['eval_accuracy']}, F1={base_eval['eval_f1']}")

LoRA 모델 평가 결과:


100%|██████████| 28/28 [00:00<00:00, 48.31it/s]


기존 RoBERTa 모델 평가 결과:


100%|██████████| 28/28 [00:00<00:00, 49.33it/s]

LoRA 모델 성능: Accuracy=0.9220183486238532, F1=0.9219752242215248
기존 RoBERTa 모델 성능: Accuracy=0.9438073394495413, F1=0.9437995754075569



