In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import openai
from dotenv import load_dotenv
import os

# .env 파일에서 OpenAI API 키 로드
load_dotenv()
openai.api_key = os.environ.get("OPENAI_API_KEY")

In [None]:
# 모델 및 토크나이저 로드
model_name = "intfloat/multilingual-e5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

In [None]:
# 텍스트 임베딩 함수 정의
def embed_texts(texts):
    texts = [str(text) for text in texts]  # Ensure all texts are strings
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.numpy()  # 텐서를 넘파이 배열로 변환

# 임베딩 평가 함수 정의
def evaluate_embeddings(question_embedding, answer_embedding):
    evaluation_prompt = f"""
    Please evaluate the following aspects of the answer based on its embedding:
    1. Accuracy
    2. Relevance
    3. Completeness
    4. Clarity

    Provide your evaluation in the format:
    Accuracy: [score]
    Relevance: [score]
    Completeness: [score]
    Clarity: [score]
    """
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": evaluation_prompt}
        ],
        max_tokens=150
    )
    return response.choices[0].message['content'].strip()

In [None]:
# 데이터 로드
gemma2 = pd.read_csv('qa_results_gemma2.csv')
gemma2_instruction = pd.read_csv('qa_results_gemma2_instruction.csv')
gpt4o = pd.read_csv('qa_results_gpt4o.csv')

# 데이터 결합
combined = pd.DataFrame({'question': gemma2['question'].astype(str)})
combined['gemma2'] = gemma2['answer'].astype(str)
combined['gemma2_instruction'] = gemma2_instruction['answer'].astype(str)
combined['gpt4o'] = gpt4o['answer'].astype(str)

# 빈 값 제거 (선택사항: 필요 시 사용)
combined = combined.dropna()

# 텍스트 임베딩
combined['question_embedding'] = list(embed_texts(combined['question'].tolist()))
combined['gemma2_embedding'] = list(embed_texts(combined['gemma2'].tolist()))
combined['gemma2_instruction_embedding'] = list(embed_texts(combined['gemma2_instruction'].tolist()))
combined['gpt4o_embedding'] = list(embed_texts(combined['gpt4o'].tolist()))

print("임베딩 완료!")

In [8]:
# 평가 결과 저장을 위한 딕셔너리 초기화
evaluations = {
    'question': [],
    'gemma2_accuracy': [], 'gemma2_relevance': [], 'gemma2_completeness': [], 'gemma2_clarity': [],
    'gemma2_instruction_accuracy': [], 'gemma2_instruction_relevance': [], 'gemma2_instruction_completeness': [], 'gemma2_instruction_clarity': [],
    'gpt4o_accuracy': [], 'gpt4o_relevance': [], 'gpt4o_completeness': [], 'gpt4o_clarity': []
}

for idx, row in combined.iterrows():
    question_embedding = row['question_embedding']
    gemma2_embedding = row['gemma2_embedding']
    gemma2_instruction_embedding = row['gemma2_instruction_embedding']
    gpt4o_embedding = row['gpt4o_embedding']

    try:
        gemma2_eval = evaluate_embeddings(question_embedding, gemma2_embedding)
        gemma2_inst_eval = evaluate_embeddings(question_embedding, gemma2_instruction_embedding)
        gpt4o_eval = evaluate_embeddings(question_embedding, gpt4o_embedding)

        evaluations['question'].append(row['question'])

        gemma2_scores = [int(s.split(': ')[1]) for s in gemma2_eval.split('\n')]
        evaluations['gemma2_accuracy'].append(gemma2_scores[0])
        evaluations['gemma2_relevance'].append(gemma2_scores[1])
        evaluations['gemma2_completeness'].append(gemma2_scores[2])
        evaluations['gemma2_clarity'].append(gemma2_scores[3])

        gemma2_inst_scores = [int(s.split(': ')[1]) for s in gemma2_inst_eval.split('\n')]
        evaluations['gemma2_instruction_accuracy'].append(gemma2_inst_scores[0])
        evaluations['gemma2_instruction_relevance'].append(gemma2_inst_scores[1])
        evaluations['gemma2_instruction_completeness'].append(gemma2_inst_scores[2])
        evaluations['gemma2_instruction_clarity'].append(gemma2_inst_scores[3])

        gpt4o_scores = [int(s.split(': ')[1]) for s in gpt4o_eval.split('\n')]
        evaluations['gpt4o_accuracy'].append(gpt4o_scores[0])
        evaluations['gpt4o_relevance'].append(gpt4o_scores[1])
        evaluations['gpt4o_completeness'].append(gpt4o_scores[2])
        evaluations['gpt4o_clarity'].append(gpt4o_scores[3])
    except Exception as e:
        print(f"Error processing row {idx}: {e}")

# 평가 결과를 DataFrame으로 변환
evaluation_df = pd.DataFrame(evaluations)

# 평가 결과를 CSV 파일로 저장
evaluation_df.to_csv('evaluation_results.csv', index=False)

print("평가 완료 및 결과 저장!")


임베딩 완료!
Error processing row 0: list index out of range
Error processing row 1: list index out of range
Error processing row 2: list index out of range
Error processing row 3: list index out of range
Error processing row 4: list index out of range
Error processing row 5: list index out of range
Error processing row 6: list index out of range
Error processing row 7: list index out of range
Error processing row 8: list index out of range
Error processing row 9: list index out of range
Error processing row 10: list index out of range
Error processing row 11: list index out of range
Error processing row 12: list index out of range


KeyboardInterrupt: 