In [10]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict

def model(train_df, test_df):
    # 데이터셋을 HuggingFace의 Dataset 형태로 변환
    train_dataset = Dataset.from_pandas(train_df)
    test_dataset = Dataset.from_pandas(test_df)
    datasets = DatasetDict({"train": train_dataset, "test": test_dataset})

    model_name = "klue/roberta-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)

    # 데이터셋 토크나이즈 함수 정의
    def preprocess_function(examples):
        return tokenizer(examples["conversation"], truncation=True, padding="max_length", max_length=128)

    # 데이터셋 토크나이즈 적용
    tokenized_datasets = datasets.map(preprocess_function, batched=True)

    # Trainer 설정
    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["test"],
        tokenizer=tokenizer,
    )
    
    return trainer