In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, TrainerCallback
from datasets import load_dataset, load_metric
import numpy as np
import pandas as pd
from peft import get_peft_model, LoraConfig
from copy import deepcopy
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score

class CustomCallback(TrainerCallback):
    def __init__(self, trainer) -> None:
        super().__init__()
        self._trainer = trainer
    def on_epoch_end(self, args, state, control, **kwargs):
        if control.should_evaluate:
            control_copy = deepcopy(control)
            self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train")
            return control_copy

# 데이터 로드
dataset = load_dataset('csv', data_files={'train': 'tfs_train1.csv', 'test': 'tfs_test1.csv'})

# 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained('klue/bert-base')

# 텍스트 데이터 토크나이즈
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# 라벨 설정 및 불필요한 컬럼 제거
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets.set_format("torch")

# 모델 로드
model = AutoModelForSequenceClassification.from_pretrained('klue/bert-base', num_labels=43)

# LoRA 설정
lora_config = LoraConfig(
    r=8,  # rank
    lora_alpha=32,  # scaling factor
    target_modules=['query', 'value'],  # 적용할 모듈
    lora_dropout=0.1,  # dropout rate
    bias='none'
)

# LoRA 모델로 변환
model = get_peft_model(model, lora_config)

# 훈련 설정
training_args = TrainingArguments(
    output_dir='result/result2_model_4',
    eval_strategy='epoch',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_strategy='epoch',
    logging_dir='./logs',  # 로그 디렉토리 설정
    logging_steps=10,  # 로그 기록 빈도 설정
)

# 평가 지표 정의
metric = load_metric('accuracy', trust_remote_code=True)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

#def compute_metrics(pred):
#    labels = pred.label_ids
#    preds = pred.predictions.argmax(-1)

#    m1 = load_metric('accuracy')
#    m2 = load_metric('f1')

#    acc = m1.compute(predictions=preds, references=labels)['accuracy']
#    f1 = m2.compute(predictions=preds, references=labels)['f1']

#    return {'accuracy':acc, 'f1':f1}

# 트레이너 설정
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    compute_metrics=compute_metrics # metric 계산 함수
)

# 모델 훈련
trainer.add_callback(CustomCallback(trainer))  # CustomCallback 추가
trainer.train()

# 모델 저장
trainer.save_model('result/result2_model_4')

# 모델 평가
eval_results = trainer.evaluate()

# 정확도와 기타 평가 지표 출력
print(eval_results)

2024-07-19 18:53:29.398393: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-19 18:53:29.577574: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-19 18:53:29.577604: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-19 18:53:29.606092: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-19 18:53:29.672578: I tensorflow/core/platform/cpu_feature_guar

FileNotFoundError: Unable to find '/home/p/tfs_train1.csv'