In [1]:
def check_negative_results(input_file, output_file, metadata_folder, purpose):
    import re

    input_file = f'../../results/gpt_result/{input_file}'
    output_file = f'../../results/error_detect/{output_file}'
    metadata_folder = f'../../prompts/{metadata_folder}'

    def parse_negative_result_file(filename):
        users_answers = {}
        with open(filename, 'r', encoding='utf-8') as f:
            current_user = None
            for line in f:
                line = line.strip()
                if not line:
                    continue
                if line.startswith('[') and line.endswith(']'):
                    current_user = line[1:-1]
                    users_answers[current_user] = {}
                elif line.startswith('Question '):
                    if current_user is not None:
                        try:
                            question_part, answer_part = line.split(':')
                            question_num = question_part.strip()
                            answer = int(answer_part.strip())
                            users_answers[current_user][question_num] = answer
                        except ValueError:
                            pass
        return users_answers

    def parse_output_metadata_file(filename):
        users_question_count = {}
        with open(filename, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                match = re.match(r'User ID: (\S+).*Question 수: (\d+)', line)
                if match:
                    user_id = match.group(1)
                    question_count = int(match.group(2))
                    users_question_count[user_id] = question_count
        return users_question_count

    # Parse files
    users_answers = parse_negative_result_file(input_file)
    output_metadata_file = f'{metadata_folder}/{purpose}/metadata/output_metadata.txt'
    users_question_count = parse_output_metadata_file(output_metadata_file)

    # Collect errors
    errors = []
    for user_id in users_question_count:
        expected_questions = users_question_count[user_id]
        user_answers = users_answers.get(user_id)
        error_types = []
        if user_answers is None:
            error_types.append(f'질문 누락(실제 {expected_questions}개)')
            answers = {}
        else:
            answers = user_answers
            actual_questions = len(user_answers)
            if actual_questions < expected_questions:
                error_types.append(f'질문 누락(실제 {expected_questions}개)')
            out_of_range = False
            for answer in user_answers.values():
                if not (1 <= answer <= 5):
                    out_of_range = True
                    break
            if out_of_range:
                error_types.append('범위 초과')
        if error_types:
            error_info = {
                'user_id': user_id,
                'error_types': error_types,
                'answers': answers
            }
            errors.append(error_info)

    # Generate output
    output_lines = []
    if errors:
        error_user_messages = []
        for error in errors:
            user_id = error['user_id']
            error_types = error['error_types']
            error_type_str = ', '.join(error_types)
            error_user_messages.append(f'[{user_id}] : {error_type_str}')
        error_user_messages_str = ', '.join(error_user_messages)
        output_lines.append(f'오류 USER: {error_user_messages_str}')
        user_numbers = [error['user_id'][1:] for error in errors]
        output_lines.append(f'USER list: [{", ".join(user_numbers)}]')
        output_lines.append(f'총 오류 USER: {len(errors)}\n')
        for error in errors:
            user_id = error['user_id']
            error_types = error['error_types']
            error_type_str = ', '.join(error_types)
            output_lines.append(f'[{user_id}] : {error_type_str}')
            answers = error['answers']
            for question_num in sorted(answers.keys()):
                answer = answers[question_num]
                output_lines.append(f'{question_num} : {answer}')
            output_lines.append('')
    else:
        output_lines.append('오류가 발견되지 않았습니다.')

    # Write to output file
    with open(output_file, 'w', encoding='utf-8') as f:
        for line in output_lines:
            f.write(line + '\n')


In [3]:
check_negative_results(
    input_file='[241230-5] negative_3.txt', 
    output_file='[241230-5] negative_3_detect.txt',
    metadata_folder='[241230-5] 1~1000',
    purpose='with_negative'
    )

# check_negative_results(
#     input_file='[241226-1] negative.txt', 
#     output_file='[241226-1] negative_detect.txt',
#     metadata_folder='[241226-1] 1~1000',
#     purpose='with_negative'
#     )