In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/home/user/wylee/ kogpt_new/datasets/OCEAN_dataset.csv')

In [3]:
import torch
import torch.nn as nn
from transformers import GPT2PreTrainedModel, GPT2Model

class KoGPT2ForRegression(GPT2PreTrainedModel):
    """
    KoGPT2를 Regression Task(OCEAN 예측) 가능하도록 수정한 모델.
    """
    def __init__(self, config):
        super().__init__(config)
        self.transformer = GPT2Model(config)  # 기존 KoGPT2 구조 유지
        self.regression_head = nn.Linear(config.hidden_size, 5)  # 🔹 768 → 5로 변경

        self.init_weights()

    def forward(self, input_ids, attention_mask=None, labels=None):
        """
        Forward pass
        """
        outputs = self.transformer(input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state[:, -1, :]  # 🔹 마지막 토큰의 hidden state 사용
        logits = self.regression_head(hidden_states)  # 🔹 Linear Layer 통과
        
        loss = None
        if labels is not None:
            loss_fn = nn.SmoothL1Loss()  # 🔹 Regression을 위한 MSE Loss 사용
            loss = loss_fn(logits, labels)  # MSE 계산
        
        return {"loss": loss, "logits": logits}

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = KoGPT2ForRegression.from_pretrained('skt/kogpt2-base-v2')
tokenizer = AutoTokenizer.from_pretrained(
    'skt/kogpt2-base-v2',
    bos_token='</s>', eos_token='</s>', unk_token='<unk>', pad_token='<pad>',
    padding_side="right",
    model_max_length=512,
)

Some weights of KoGPT2ForRegression were not initialized from the model checkpoint at skt/kogpt2-base-v2 and are newly initialized: ['regression_head.bias', 'regression_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
print(model)

KoGPT2ForRegression(
  (transformer): GPT2Model(
    (wte): Embedding(51200, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (regression_head): Linear(in_features=768, out_features=5, bias=True)
)


In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from transformers import  pipeline
from transformers import Trainer, TrainingArguments
from copy import deepcopy
from dataclasses import dataclass

In [7]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import transformers
import copy
import logging
from typing import Optional, Dict, Sequence

class SFT_dataset(Dataset):
    def __init__(self, df: pd.DataFrame, tokenizer: transformers.PreTrainedTokenizer, verbose=False):
        logging.warning("Loading data...")

        # 프롬프트 템플릿 (Transcription을 입력으로 사용)
        PROMPT_DICT = {
            "prompt_input": (
                "### Instruction(명령어):\n주어진 대화를 보고, 성격 5요인을 예측하세요.\n\n"
                "### Transcription(대화):\n{transcription}\n\n"
                "### Response(응답):"
            )
        }

        # 모델 입력 데이터 생성 (transcription을 입력으로)
        sources = []
        for _, row in df.iterrows():
            prompt = PROMPT_DICT["prompt_input"].format(
                transcription=row["input_text"]  # 대화 입력
            )
            sources.append(prompt)

        # 정답 데이터 (OCEAN 값) 생성 (string → tensor 변환)
        targets = []
        for _, row in df.iterrows():
            ocean_values = eval(row['OCEAN'])  # 🔹 문자열을 리스트로 변환
            targets.append(torch.tensor(ocean_values, dtype=torch.float32))  # 🔹 Float Tensor로 변환

        # 토큰화 (프롬프트만)
        sources_tokenized = self._tokenize_fn(sources, tokenizer)  # instruction + transcription만

        # 토큰화된 input_ids 저장
        self.input_ids = sources_tokenized["input_ids"]
        
        # labels에 직접 OCEAN float tensor 저장 (MSELoss 적용을 위해)
        self.labels = targets  

        logging.warning("Loading data done!!: %d" % len(self.labels))

    def _tokenize_fn(self, strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:
        tokenized_list = tokenizer(
            strings,
            return_tensors="pt",
            padding="longest",
            max_length=tokenizer.model_max_length,
            truncation=True,
        )
        input_ids = tokenized_list["input_ids"]
        
        return dict(
            input_ids=input_ids
        )

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, i) -> Dict[str, torch.Tensor]:
        return dict(input_ids=self.input_ids[i], labels=self.labels[i])  # 🔹 labels을 float tensor로 반환


In [8]:
from dataclasses import dataclass
from typing import Dict, Sequence
import torch
import transformers

@dataclass
class DataCollatorForSupervisedDataset:
    tokenizer: transformers.PreTrainedTokenizer

    def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
        """
        Regression Task에 맞춰 labels를 float tensor로 변환하고, 
        적절한 padding을 적용하는 collator.
        """
        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
        
        # 🔹 input_ids 패딩 (기존 방식 유지)
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
        )
        
        # 🔹 labels 패딩 (Regression을 위해 float tensor로 처리)
        labels = torch.nn.utils.rnn.pad_sequence(
            labels, batch_first=True, padding_value=0.0  # 🔹 -100 대신 0.0 사용
        ).to(dtype=torch.float32)  # 🔹 float32로 변환

        return dict(
            input_ids=input_ids,
            labels=labels,  # 🔹 Regression을 위한 float labels
            attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
        )


In [9]:
from transformers import DataCollatorWithPadding
from sklearn.model_selection import train_test_split

train_df, eval_df = train_test_split(df, test_size=0.2, random_state=42)

print(train_df.shape, eval_df.shape)

train_dataset = SFT_dataset(df=train_df, tokenizer=tokenizer)
eval_dataset = SFT_dataset(df=eval_df, tokenizer=tokenizer)

# 🔹 Data Collator 설정
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)



(9045, 2) (2262, 2)




In [10]:
import torch

def decode_kogpt2_output(logits, tokenizer):
    """
    KoGPT2의 output(logits)을 디코딩하는 함수.
    logits을 tokenizer를 사용하여 사람이 읽을 수 있는 텍스트로 변환.
    """
    # 🔹 Logits에서 가장 확률이 높은 토큰 선택 (argmax)
    predicted_token_ids = torch.argmax(logits, dim=-1).tolist()  # 🔹 .tolist() 추가

    # 🔹 선택된 토큰을 텍스트로 변환
    decoded_text = tokenizer.decode(predicted_token_ids, skip_special_tokens=True)
    
    return decoded_text

In [11]:
# MSELoss를 사용하도록 Trainer 수정
class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kargs):
        labels = inputs.pop("labels")  # 정답 라벨 추출
        outputs = model(**inputs)  # 모델 실행
        logits = outputs["logits"]  # 🔹 (batch_size, 5)

        loss_fn = torch.nn.SmoothL1Loss()
        loss = loss_fn(logits, labels)  # 🔹 MSE Loss 적용

        return (loss, outputs) if return_outputs else loss

In [12]:
training_args = TrainingArguments(
    output_dir="./kogpt_sft/test/dt_col/smooth/",
    overwrite_output_dir=True,
    num_train_epochs=50,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=5,
    prediction_loss_only=False,  # 🔹 softmax가 필요 없으므로 False로 변경
    fp16=True
)

trainer = RegressionTrainer(  # 🔹 RegressionTrainer 사용
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer
)

# 모델 학습 실행
trainer.train()
'''



from transformers import TrainingArguments, TrainerCallback

# 🔹 Eval loss가 낮아질 때만 출력하는 콜백 정의
class BestLossLoggerCallback(TrainerCallback):
    def __init__(self):
        self.best_loss = float("inf")

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        if metrics is not None and "eval_loss" in metrics:
            current_loss = metrics["eval_loss"]
            if current_loss < self.best_loss:
                self.best_loss = current_loss
                print(f"\n🔹 New best eval_loss: {current_loss:.4f} at epoch {int(state.epoch)}")

# 🔹 TrainingArguments 설정
training_args = TrainingArguments(
    output_dir="./kogpt_sft/test/dt_col/smooth/",
    overwrite_output_dir=True,
    num_train_epochs=100,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=5,
    prediction_loss_only=False,
    fp16=True,

    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

# 🔹 Trainer 정의
trainer = RegressionTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    callbacks=[
        BestLossLoggerCallback()  # best loss만 출력
    ]
)

# 🔹 모델 학습
trainer.train()




'''
'''
training_args = TrainingArguments(
    output_dir="./kogpt_sft/test/dt_col/smooth/",
    overwrite_output_dir=True,
    num_train_epochs=50,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=5,
    prediction_loss_only=False,  # 🔹 softmax가 필요 없으므로 False로 변경
    fp16=True
)

trainer = RegressionTrainer(  # 🔹 RegressionTrainer 사용
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer
)

# 모델 학습 실행
trainer.train()

'''
'''




from transformers import EarlyStoppingCallback, TrainingArguments

# EarlyStoppingCallback 설정
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=3
)

training_args = TrainingArguments(
    output_dir="./kogpt_sft/test/dt_col/smooth/",
    overwrite_output_dir=True,
    num_train_epochs=50,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=5,
    prediction_loss_only=False,
    fp16=True,
    
    # ✅ EarlyStopping을 위한 필수 설정
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

trainer = RegressionTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    callbacks=[early_stopping]
)

trainer.train()



'''



  trainer = RegressionTrainer(  # 🔹 RegressionTrainer 사용


Step,Training Loss
500,0.0266
1000,0.0165
1500,0.0143
2000,0.0144
2500,0.0136
3000,0.0138
3500,0.0134
4000,0.0131
4500,0.0137
5000,0.0126


'\n\n\n\n\nfrom transformers import EarlyStoppingCallback, TrainingArguments\n\n# EarlyStoppingCallback 설정\nearly_stopping = EarlyStoppingCallback(\n    early_stopping_patience=3\n)\n\ntraining_args = TrainingArguments(\n    output_dir="./kogpt_sft/test/dt_col/smooth/",\n    overwrite_output_dir=True,\n    num_train_epochs=50,\n    per_device_train_batch_size=4,\n    per_device_eval_batch_size=4,\n    warmup_steps=5,\n    prediction_loss_only=False,\n    fp16=True,\n    \n    # ✅ EarlyStopping을 위한 필수 설정\n    evaluation_strategy="epoch",\n    save_strategy="epoch",\n    load_best_model_at_end=True,\n    metric_for_best_model="eval_loss",\n    greater_is_better=False\n)\n\ntrainer = RegressionTrainer(\n    model=model,\n    args=training_args,\n    data_collator=data_collator,\n    train_dataset=train_dataset,\n    eval_dataset=eval_dataset,\n    tokenizer=tokenizer,\n    callbacks=[early_stopping]\n)\n\ntrainer.train()\n\n\n\n'

In [13]:
import torch

def predict_ocean(transcription, model, tokenizer):
    """
    KoGPT2 Regression 모델을 사용하여 OCEAN 점수를 예측하는 함수.
    """
    input_text = (
        "### Instruction(명령어):\n주어진 대화를 보고, 성격 5요인을 예측하세요.\n\n"
        f"### Transcription(대화):\n{transcription}\n\n"
        "### Response(응답):"
    )
    
    # Move model to the correct device (GPU if available, otherwise CPU)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    
    # Tokenize input and move input tensors to the same device
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    # 모델 실행 (generate() 대신 forward() 호출)
    with torch.no_grad():  # 🔹 Inference 시 Gradient 계산 비활성화
        outputs = model(**inputs)
        logits = outputs["logits"]  # 🔹 OCEAN 값 예측 (batch_size, 5)

    # logits을 0~1 사이 값으로 변환 (Regression Scaling)
    predicted_ocean = torch.sigmoid(logits).squeeze().tolist()  # 🔹 1D 리스트 변환

    return predicted_ocean  # 🔹 5개의 float 값을 리스트로 반환

# ✅ 테스트 실행
sample_transcription = "아."
predicted_ocean = predict_ocean(sample_transcription, model, tokenizer)

print(f"Predicted OCEAN Values: {predicted_ocean}")


Predicted OCEAN Values: [0.6432172656059265, 0.643889307975769, 0.68668532371521, 0.6261242032051086, 0.6144518256187439]


In [14]:
import numpy as np
import itertools
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
from torch.utils.data import DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"

model.to(device)

# eval_dataset을 DataLoader로 감싸기
eval_dataloader = DataLoader(eval_dataset, batch_size=1)

# 모델을 평가 모드로 설정
model.eval()


KoGPT2ForRegression(
  (transformer): GPT2Model(
    (wte): Embedding(51200, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (regression_head): Linear(in_features=768, out_features=5, bias=True)
)

In [15]:

# 예측값과 실제값 저장할 리스트 초기화
all_predictions = []
all_labels = []

# 테스트 데이터에 대해 반복문 돌리기
with torch.no_grad():
    for idx, batch in enumerate(tqdm(eval_dataloader)):
        # 데이터 준비
        input_ids = batch['input_ids'].to(device)
        labels_tensor = batch['labels'].to(device)  # 🔹 실제 정답 (OCEAN 값)

        # ✅ 모델 예측 (generate() 대신 forward() 사용)
        outputs = model(input_ids)
        logits = outputs["logits"]  # 🔹 OCEAN 예측값 (batch_size, 5)

         # ✅ 예측값을 0~1 범위로 변환 (sigmoid 사용)
        predicted_ocean = torch.sigmoid(logits).squeeze().tolist()  # 🔹 (batch_size, 5) → 리스트 변환
        # ✅ 실제 정답값 저장
        all_labels.append(labels_tensor.squeeze().tolist())  # 🔹 리스트 변환
        # ✅ 예측값 저장
        all_predictions.append(predicted_ocean)

  0%|          | 0/2262 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
100%|██████████| 2262/2262 [00:18<00:00, 123.54it/s]


In [16]:
print(all_predictions)


[[0.6743722558021545, 0.6680145263671875, 0.6633418798446655, 0.6313670873641968, 0.6241790056228638], [0.6746938228607178, 0.6695287823677063, 0.6637779474258423, 0.633637011051178, 0.6269239783287048], [0.6789658665657043, 0.6673644781112671, 0.6598436236381531, 0.6318215131759644, 0.6277230381965637], [0.6772605776786804, 0.6633418798446655, 0.6589664220809937, 0.6334102749824524, 0.6268097162246704], [0.6721165180206299, 0.6590760946273804, 0.6603914499282837, 0.6314806342124939, 0.6080123782157898], [0.6771538853645325, 0.6684475541114807, 0.6625781655311584, 0.6311396956443787, 0.6265812516212463], [0.6755505800247192, 0.6674728393554688, 0.6680145263671875, 0.6279512047767639, 0.6284074187278748], [0.675336480140686, 0.6696368455886841, 0.6619228720664978, 0.633976936340332, 0.6290912628173828], [0.6749081611633301, 0.6679062247276306, 0.6627964973449707, 0.6318215131759644, 0.6311396956443787], [0.6752294301986694, 0.6713628172874451, 0.6681228280067444, 0.6254380941390991, 0.6

In [17]:
print(all_labels)

[[0.6875, 0.47920000553131104, 0.4375, 0.770799994468689, 0.4375], [0.5625, 0.833299994468689, 0.8125, 0.3125, 0.27079999446868896], [0.4375, 0.708299994468689, 0.5625, 0.39579999446868896, 0.41670000553131104], [0.729200005531311, 0.47920000553131104, 0.5, 0.45829999446868896, 0.729200005531311], [0.604200005531311, 0.875, 0.833299994468689, 0.5, 0.625], [0.770799994468689, 0.708299994468689, 0.666700005531311, 0.520799994468689, 0.541700005531311], [0.41670000553131104, 0.541700005531311, 0.541700005531311, 0.5, 0.6875], [0.5625, 0.833299994468689, 0.8125, 0.3125, 0.27079999446868896], [0.645799994468689, 0.583299994468689, 0.854200005531311, 0.583299994468689, 0.27079999446868896], [0.979200005531311, 0.979200005531311, 0.833299994468689, 0.645799994468689, 0.20829999446868896], [0.5625, 0.833299994468689, 0.8125, 0.3125, 0.27079999446868896], [0.541700005531311, 0.5625, 0.8125, 0.583299994468689, 0.29170000553131104], [0.833299994468689, 0.604200005531311, 0.5625, 0.666700005531311

In [18]:
import numpy as np

# 🔹 numpy 배열 변환
labels_array = np.array(all_labels)
predictions_array = np.array(all_predictions)

# 🔹 소수점 4자리까지 반올림
labels_rounded = np.round(labels_array, 4)
predictions_rounded = np.round(predictions_array, 4)

# ✅ 출력
print("🔹 Rounded Predictions:")
print(predictions_rounded.tolist())  # 리스트 형태로 변환하여 출력

print("\n🔹 Rounded Labels:")
print(labels_rounded.tolist())  # 리스트 형태로 변환하여 출력


🔹 Rounded Predictions:
[[0.6744, 0.668, 0.6633, 0.6314, 0.6242], [0.6747, 0.6695, 0.6638, 0.6336, 0.6269], [0.679, 0.6674, 0.6598, 0.6318, 0.6277], [0.6773, 0.6633, 0.659, 0.6334, 0.6268], [0.6721, 0.6591, 0.6604, 0.6315, 0.608], [0.6772, 0.6684, 0.6626, 0.6311, 0.6266], [0.6756, 0.6675, 0.668, 0.628, 0.6284], [0.6753, 0.6696, 0.6619, 0.634, 0.6291], [0.6749, 0.6679, 0.6628, 0.6318, 0.6311], [0.6752, 0.6714, 0.6681, 0.6254, 0.6293], [0.6708, 0.6587, 0.6594, 0.6433, 0.6196], [0.6727, 0.6704, 0.6607, 0.6311, 0.6278], [0.6754, 0.6705, 0.6559, 0.6351, 0.6283], [0.6739, 0.6684, 0.666, 0.6335, 0.6224], [0.6754, 0.6691, 0.6623, 0.6324, 0.6273], [0.673, 0.6705, 0.666, 0.6316, 0.626], [0.6743, 0.6618, 0.6549, 0.6391, 0.628], [0.6734, 0.67, 0.6641, 0.6317, 0.6259], [0.6785, 0.6687, 0.6641, 0.6332, 0.6295], [0.6728, 0.6704, 0.6661, 0.6324, 0.6257], [0.675, 0.6646, 0.6581, 0.6339, 0.6302], [0.6732, 0.6694, 0.665, 0.6323, 0.6276], [0.6705, 0.6732, 0.6658, 0.6325, 0.6299], [0.6743, 0.6699, 0.6629, 0

In [19]:
# 🔹 MAE 계산
mae_score = mean_absolute_error(labels_rounded, predictions_rounded)

In [20]:
# 🔹 1-MAE 출력
one_minus_mae = 1 - mae_score
print(f"✅ 1-MAE Score: {one_minus_mae}")

✅ 1-MAE Score: 0.8517242086648983


In [21]:
# 🔹 각 OCEAN 요소별 MAE 계산
mae_per_trait = []
for i in range(5):
    mae = mean_absolute_error(labels_array[:, i], predictions_array[:, i])  # i번째 요소에 대해 MAE 계산
    mae_per_trait.append(mae)

In [22]:
# 🔹 1 - MAE 계산
one_minus_mae_per_trait = [1 - mae for mae in mae_per_trait]

# ✅ 결과 출력
trait_names = ["O", "C", "E", "A", "N"]
for name, score in zip(trait_names, one_minus_mae_per_trait):
    print(f"1-MAE for {name}: {score:.4f}")

# ✅ 평균 1-MAE 출력
mean_one_minus_mae = np.mean(one_minus_mae_per_trait)
print(f"✅ Mean 1-MAE Score: {mean_one_minus_mae:.4f}")

1-MAE for O: 0.8695
1-MAE for C: 0.8781
1-MAE for E: 0.8636
1-MAE for A: 0.8746
1-MAE for N: 0.7728
✅ Mean 1-MAE Score: 0.8517


In [23]:
# ✅ 테스트 실행
sample_transcription = "아 그럼 3시 반."
predicted_ocean = predict_ocean(sample_transcription, model, tokenizer)

print(f"Predicted OCEAN Values: {predicted_ocean}")

Predicted OCEAN Values: [0.646683394908905, 0.6414223909378052, 0.6813028454780579, 0.6238353252410889, 0.6245225667953491]
