In [1]:
import re

def parse_user_results(lines):
    user_results = []
    question_counter = 1  # 질문 번호 초기화
    
    for line in lines:
        line = line.strip()
        if not line:
            continue  # 빈 줄은 건너뜁니다.

        # 1) 'Question N. The index number of the [MASK] is X.'+ 형태
        match = re.match(r'Question\s*\d+\.\s*The index number of the \[MASK\] is (\d+)\.?', line)
        if match:
            answer = match.group(1)
        else:
            # 2) 'Question N: X' 또는 'Question N. X' 형태
            match = re.match(r'Question\s*\d+[\.:]\s*(\d+)', line)
            if match:
                answer = match.group(1)
            else:
                # 2-1) 'Question N) X' 형태
                match = re.match(r'Question\s*(\d+)\)\s*(\d+)', line)
                if match:
                    answer = match.group(2)
                else:
                    # 3) 'N. The index number of the [MASK] is X.' 형태
                    match = re.match(r'\d+\.\s*The index number of the \[MASK\] is (\d+)\.?', line)
                    if match:
                        answer = match.group(1)
                    else:
                        # 4) 'N[\.:]\s*(X)' 형태
                        match = re.match(r'\d+[\.:]\s*(\d+)', line)
                        if match:
                            answer = match.group(1)
                        else:
                            # 4-1) 'N) X' 형태
                            match = re.match(r'(\d+)\)\s*(\d+)', line)
                            if match:
                                answer = match.group(2)
                            else:
                                # 5) 'The index number of the [MASK] is X' 형태
                                match = re.match(r'The index number of the \[MASK\] is (\d+)\.?', line)
                                if match:
                                    answer = match.group(1)
                                else:
                                    # 6) 단순히 숫자만 있는 라인 처리
                                    match = re.match(r'^(\d+)$', line)
                                    if match:
                                        answer = match.group(1)
                                    else:
                                        # 인식할 수 없는 라인은 건너뜁니다.
                                        continue

        # 매칭되면 결과에 추가
        user_results.append(f"Question {question_counter}: {answer}")
        question_counter += 1

    return user_results


def standardize_results(input_filename, output_filename):
    input_filename = f'../../results/gpt_result/{input_filename}'
    output_filename = f'../../results/gpt_result/{output_filename}'

    with open(input_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    user_data = {}
    current_user = None
    current_lines = []

    for line in lines:
        line = line.strip()
        if not line:
            continue  # 빈 줄은 건너뜁니다.

        # 사용자 식별자 확인
        user_match = re.match(r'\[([^\]]+)\]', line)
        if user_match:
            # 이전 사용자의 데이터를 처리
            if current_user is not None and current_lines:
                user_data[current_user] = parse_user_results(current_lines)
                current_lines = []
            current_user = user_match.group(1)
        else:
            if current_user is not None:
                current_lines.append(line)
            else:
                # 사용자 식별자 이전의 라인은 건너뜁니다.
                continue

    # 마지막 사용자의 데이터를 처리
    if current_user and current_lines:
        user_data[current_user] = parse_user_results(current_lines)

    # 결과를 출력 파일에 씁니다.
    with open(output_filename, 'w', encoding='utf-8') as outfile:
        for user, results in user_data.items():
            outfile.write(f"[{user}]\n")
            for result in results:
                outfile.write(f"{result}\n")
            outfile.write("\n")  # 사용자 간 빈 줄 추가


In [7]:
input_filename = '[250422] MIND_recurrent summary.txt'
output_filename = '[250422] MIND_recurrent summary.txt'
standardize_results(input_filename, output_filename)
