## measure_metrics 함수 정의

In [83]:
import os
import math
import re

def read_hidden_positions(file_path):
    """
    question 정답 위치 정보를 읽어오는 함수
    """
    hidden_positions = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            match = re.match(r'U(\d+)\s*:\s*\[(.*?)\]', line.strip())
            if match:
                user_id = int(match.group(1))   # 사용자 ID 추출
                positions = list(map(int, match.group(2).split(','))) # 정답 위치 리스트 추출
                hidden_positions[user_id] = positions
    return hidden_positions

def read_predicted_rankings(file_path):
    """
    예측된 순위를 읽어오는 함수
    - 파일 내에서 [U숫자] 단위로 split 한 다음
      각 블록 내의 매 라인에서 "Question X: ..." 패턴을 찾아 순위를 파싱
    """
    predicted_rankings = {}
    
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # [U숫자] 블록들을 추출 (user_id, user_content)
    user_blocks = re.findall(r'\[U(\d+)\](.*?)(?=\n\[U\d+\]|\Z)', content, flags=re.DOTALL)

    for user_block in user_blocks:
        user_id = int(user_block[0])
        user_content = user_block[1]

        lines = user_content.strip().split('\n')
        user_questions = {}

        for line in lines:
            # "Question 1: 19, 4, ..." 형태의 패턴
            match_q = re.match(r'Question\s+(\d+)\s*:\s*(.*)', line.strip())
            if match_q:
                q_num = int(match_q.group(1))
                ranking_str = match_q.group(2).strip()
                
                # 쉼표로 split -> 정수 변환
                # 이 때, x.strip()이 빈 문자열인 경우를 필터링해서 예외 방지
                ranking = [
                    int(x.strip())
                    for x in ranking_str.split(',')
                    if x.strip()  # 빈 문자열("")은 건너뛴다
                ]
                
                user_questions[q_num] = ranking

        predicted_rankings[user_id] = user_questions

    return predicted_rankings


def ndcg_for_rank(rank, k):
    """
    랭크(rank: 1-based)가 k 이내일 때의 nDCG 기여도를 계산하는 함수.
    rank가 k보다 크면 0을 반환.
    """
    if rank <= k:
        # DCG 공식: 1 / log2(rank + 1)
        return 1 / math.log2(rank + 1)
    return 0


def compute_metrics(rankings, hidden_positions):
    user_ndcg = {}
    question_ndcg_5 = []
    question_ndcg_10 = []
    question_auc = []
    question_mrr = []

    total_questions = 0
    total_correct_top1 = 0

    user_accuracy = {}
    user_auc = {}
    user_mrr = {}

    user_correct_counts = {}
    user_total_questions = {}

    for user_id, user_rankings in rankings.items():
        if user_id not in hidden_positions:
            continue

        user_positions = hidden_positions[user_id]
        ndcg5_values = []
        ndcg10_values = []
        auc_values = []
        mrr_values = []

        correct_top1_count = 0
        total_user_questions = 0

        for q_num, ranking in user_rankings.items():
            if q_num - 1 >= len(user_positions):
                continue

            correct_item = user_positions[q_num - 1]
            total_questions += 1
            total_user_questions += 1

            if correct_item in ranking:
                rank = ranking.index(correct_item) + 1
                ndcg5 = ndcg_for_rank(rank, 5)
                ndcg10 = ndcg_for_rank(rank, 10)
                rr = 1.0 / rank
                auc = (len(ranking) - rank) / (len(ranking) - 1) if len(ranking) > 1 else 0
            else:
                ndcg5 = 0
                ndcg10 = 0
                rr = 0
                auc = 0

            ndcg5_values.append((q_num, ndcg5))
            ndcg10_values.append((q_num, ndcg10))
            auc_values.append((q_num, auc))
            mrr_values.append((q_num, rr))

            question_ndcg_5.append(ndcg5)
            question_ndcg_10.append(ndcg10)
            question_auc.append(auc)
            question_mrr.append(rr)

            if len(ranking) > 0 and ranking[0] == correct_item:
                correct_top1_count += 1
                total_correct_top1 += 1

        avg_ndcg5 = sum(v for _, v in ndcg5_values) / len(ndcg5_values) if ndcg5_values else 0
        avg_ndcg10 = sum(v for _, v in ndcg10_values) / len(ndcg10_values) if ndcg10_values else 0
        avg_auc = sum(v for _, v in auc_values) / len(auc_values) if auc_values else 0
        avg_mrr = sum(v for _, v in mrr_values) / len(mrr_values) if mrr_values else 0
        accuracy = correct_top1_count / total_user_questions if total_user_questions else 0

        user_ndcg[user_id] = {
            'ndcg5': ndcg5_values,
            'ndcg10': ndcg10_values,
            'auc': auc_values,
            'mrr': mrr_values,
            'avg_ndcg5': avg_ndcg5,
            'avg_ndcg10': avg_ndcg10,
            'avg_auc': avg_auc,
            'avg_mrr': avg_mrr
        }
        user_accuracy[user_id] = accuracy
        user_auc[user_id] = avg_auc
        user_mrr[user_id] = avg_mrr
        user_correct_counts[user_id] = correct_top1_count
        user_total_questions[user_id] = total_user_questions

    overall_user_ndcg5 = sum(v['avg_ndcg5'] for v in user_ndcg.values()) / len(user_ndcg) if user_ndcg else 0
    overall_user_ndcg10 = sum(v['avg_ndcg10'] for v in user_ndcg.values()) / len(user_ndcg) if user_ndcg else 0
    overall_question_ndcg5 = sum(question_ndcg_5) / len(question_ndcg_5) if question_ndcg_5 else 0
    overall_question_ndcg10 = sum(question_ndcg_10) / len(question_ndcg_10) if question_ndcg_10 else 0

    overall_user_auc = sum(user_auc.values()) / len(user_auc) if user_auc else 0
    overall_question_auc = sum(question_auc) / len(question_auc) if question_auc else 0

    overall_user_mrr = sum(user_mrr.values()) / len(user_mrr) if user_mrr else 0
    overall_question_mrr = sum(question_mrr) / len(question_mrr) if question_mrr else 0

    overall_user_accuracy = sum(user_accuracy.values()) / len(user_accuracy) if user_accuracy else 0
    overall_question_accuracy = total_correct_top1 / total_questions if total_questions else 0

    return (
        user_ndcg,
        overall_user_ndcg5,
        overall_user_ndcg10,
        overall_question_ndcg5,
        overall_question_ndcg10,
        user_accuracy,
        overall_user_accuracy,
        overall_question_accuracy,
        total_correct_top1,
        total_questions,
        user_correct_counts,
        user_total_questions,
        overall_user_auc,
        overall_question_auc,
        overall_user_mrr,
        overall_question_mrr
    )

def write_results(
    file_path,
    user_ndcg,
    overall_user_ndcg5,
    overall_user_ndcg10,
    overall_question_ndcg5,
    overall_question_ndcg10,
    user_accuracy,
    overall_user_accuracy,
    overall_question_accuracy,
    total_correct_top1,
    total_questions,
    user_correct_counts,
    user_total_questions,
    overall_user_auc,
    overall_question_auc,
    overall_user_mrr,
    overall_question_mrr
):
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(f'전체 USER 평균 nDCG@5  : {overall_user_ndcg5:.3f}\n')
        f.write(f'전체 USER 평균 nDCG@10 : {overall_user_ndcg10:.3f}\n')
        f.write(f'전체 USER 평균 AUC  : {overall_user_auc:.3f}\n')
        f.write(f'전체 USER 평균 MRR : {overall_user_mrr:.3f}\n')
        f.write(f'전체 USER 평균 Accuracy : {overall_user_accuracy:.3f}\n\n')
        f.write(f'전체 Question nDCG@5  : {overall_question_ndcg5:.3f}\n')
        f.write(f'전체 Question nDCG@10 : {overall_question_ndcg10:.3f}\n')
        f.write(f'전체 Question AUC : {overall_question_auc:.3f}\n')
        f.write(f'전체 Question MRR : {overall_question_mrr:.3f}\n\n')
        f.write(f'전체 Question Accuracy : {overall_question_accuracy:.3f} ({total_correct_top1} / {total_questions})\n')
        f.write('\n-----------------------------------------------------------\n\n')

        for user_id in sorted(user_ndcg.keys()):
            ndcg_info = user_ndcg[user_id]
            avg_ndcg5 = ndcg_info['avg_ndcg5']
            avg_ndcg10 = ndcg_info['avg_ndcg10']
            avg_auc = ndcg_info['avg_auc']
            avg_mrr = ndcg_info['avg_mrr']
            accuracy = user_accuracy.get(user_id, 0)
            correct_top1_count = user_correct_counts.get(user_id, 0)
            total_user_q = user_total_questions.get(user_id, 0)

            f.write(
                f'[U{user_id}] : 평균 nDCG@5 : {avg_ndcg5:.3f}  |  '
                f'평균 nDCG@10 : {avg_ndcg10:.3f}  |  '
                f'AUC : {avg_auc:.3f}  |  '
                f'MRR : {avg_mrr:.3f}  |  '
                f'Accuracy : {accuracy:.3f} ({correct_top1_count}/{total_user_q})\n'
            )

            ndcg5_dict = dict(ndcg_info['ndcg5'])
            ndcg10_dict = dict(ndcg_info['ndcg10'])

            all_q_nums = sorted(set(ndcg5_dict.keys()) | set(ndcg10_dict.keys()))
            for q_num in all_q_nums:
                ndcg5_val = ndcg5_dict.get(q_num, 0.0)
                ndcg10_val = ndcg10_dict.get(q_num, 0.0)
                f.write(
                    f'  - Question {q_num} : '
                    f'nDCG@5 = {ndcg5_val:.3f} | '
                    f'nDCG@10 = {ndcg10_val:.3f}\n'
                )
            f.write('\n')

def measure_metrics(target_file, target_folder, purpose):
    """
    metric 측정 main 함수
    """
    target_folder = f'../../prompts/{target_folder}'

    if purpose == 'new_negative':
        hidden_file = f'{target_folder}/metadata/hidden_positions.txt'
    else:
        hidden_file = f'{target_folder}/{purpose}/metadata/hidden_positions.txt'

    output_file = f'{target_file.replace(".txt", "")}_metrics.txt'

    predicted_rankings = read_predicted_rankings(
        os.path.join('../../results/gpt_result', target_file)
    )
    hidden_positions = read_hidden_positions(hidden_file)

    (
        user_ndcg,
        overall_user_ndcg5,
        overall_user_ndcg10,
        overall_question_ndcg5,
        overall_question_ndcg10,
        user_accuracy,
        overall_user_accuracy,
        overall_question_accuracy,
        total_correct_top1,
        total_questions,
        user_correct_counts,
        user_total_questions,
        overall_user_auc,
        overall_question_auc,
        overall_user_mrr,
        overall_question_mrr
    ) = compute_metrics(predicted_rankings, hidden_positions)

    os.makedirs(os.path.join('../../results', 'metrics'), exist_ok=True)
    write_results(
        os.path.join('../../results', 'metrics', output_file),
        user_ndcg,
        overall_user_ndcg5,
        overall_user_ndcg10,
        overall_question_ndcg5,
        overall_question_ndcg10,
        user_accuracy,
        overall_user_accuracy,
        overall_question_accuracy,
        total_correct_top1,
        total_questions,
        user_correct_counts,
        user_total_questions,
        overall_user_auc,
        overall_question_auc,
        overall_user_mrr,
        overall_question_mrr
    )
    print(f'{output_file} 생성 완료 (대상 : {target_file})')



## 실행

In [84]:
measure_metrics(target_file='[KCC-ranking]Adressa only_title (final).txt', target_folder = "Adressa/KCC/ranking/only_title", purpose='only_positive')

[KCC-ranking]Adressa only_title (final)_metrics.txt 생성 완료 (대상 : [KCC-ranking]Adressa only_title (final).txt)


In [93]:
measure_metrics(target_file='[KCC-ranking]Adressa subcate (final).txt', target_folder = "Adressa/KCC/ranking/subcate", purpose='only_positive')

[KCC-ranking]Adressa subcate (final)_metrics.txt 생성 완료 (대상 : [KCC-ranking]Adressa subcate (final).txt)


In [94]:
measure_metrics(target_file='[KCC-ranking]Adressa cate (final).txt', target_folder = "Adressa/KCC/ranking/cate", purpose='only_positive')

[KCC-ranking]Adressa cate (final)_metrics.txt 생성 완료 (대상 : [KCC-ranking]Adressa cate (final).txt)


In [87]:
measure_metrics(target_file='[KCC-ranking]Adressa both(subcate) (final).txt', target_folder = "Adressa/KCC/ranking/both(subcate)", purpose='only_positive')

[KCC-ranking]Adressa both(subcate) (final)_metrics.txt 생성 완료 (대상 : [KCC-ranking]Adressa both(subcate) (final).txt)


### KCC

In [55]:
measure_metrics(target_file='[250430]cate,title recurrent summary, negative.txt', target_folder = "MIND/summary/[250429]cate,title recurrent summary, negative", purpose='with_negative')

[250430]cate,title recurrent summary, negative_metrics.txt 생성 완료 (대상 : [250430]cate,title recurrent summary, negative.txt)


In [53]:
measure_metrics(target_file='[250430]cate,title one summary, negative, summary-subcate on.txt', target_folder = "MIND/summary/[250429]cate,title one summary, negative, summary-subcate on", purpose='with_negative')

[250430]cate,title one summary, negative, summary-subcate on_metrics.txt 생성 완료 (대상 : [250430]cate,title one summary, negative, summary-subcate on.txt)
