In [3]:
import pandas as pd
import pyarrow.parquet as pq

generated_data_path = '/mnt/data1/projects/Conf_Agg/output_s/curated/validation_curated.parquet'
generated_data_path = '/mnt/data1/projects/Conf_Agg/output_s/generated/sample_300/raw_generated_shard_0.parquet'
try:
    # memory_map=False로 시도 (큰 파일의 경우 더 안정적)
    table = pq.read_table(generated_data_path, memory_map=False)
    generated_data = table.to_pandas(types_mapper=pd.ArrowDtype)
except Exception as e:
    logger.warning(f"PyArrow memory_map=False 실패: {e}, memory_map=True로 재시도...")
    try:
        # memory_map=True로 재시도
        table = pq.read_table(generated_data_path, memory_map=True)
        generated_data = table.to_pandas(types_mapper=pd.ArrowDtype)
    except Exception as e2:
        logger.warning(f"PyArrow types_mapper 사용 실패: {e2}, 기본 변환으로 재시도...")
        # types_mapper 없이 시도
        table = pq.read_table(generated_data_path, memory_map=False)
        generated_data = table.to_pandas()

print(f"생성된 데이터 로드: {len(generated_data)}개 응답, {generated_data['problem_id'].nunique()}개 문제")
print("전체 컬럼 목록:", generated_data.columns.tolist())

# 2GB+ 문자열 오류 (offset overflow) 방지를 위해
# string[pyarrow] 타입을 large_string[pyarrow]로 즉시 변환
print("string 타입을 large_string으로 변환 중 (offset overflow 방지)...")
string_cols = generated_data.select_dtypes(include=['string[pyarrow]']).columns

if not string_cols.empty:
    print(f"변환 대상 컬럼: {string_cols.to_list()}")
    for col in string_cols:
        try:
            generated_data[col] = generated_data[col].astype('large_string[pyarrow]')
        except Exception as e:
            print(f"'{col}' 컬럼 large_string 변환 실패: {e}")
else:
    # types_mapper가 실패했거나 object 타입을 로드된 경우
    print("pyarrow string 타입 컬럼이 없거나, object 타입으로 로드됨. object 타입 변환 시도...")
    object_cols = generated_data.select_dtypes(include=['object']).columns
    for col in object_cols:
        try:
            # 'object' 컬럼이 실제 문자열 데이터인지 확인 (선택적)
            if not generated_data[col].empty and isinstance(generated_data[col].dropna().iloc[0], str):
                generated_data[col] = generated_data[col].astype('large_string[pyarrow]')
                print(f"'{col}' (object) 컬럼을 large_string으로 변환.")
        except Exception as e:
            # 문자열이 아닌 object일 수 있으므로 경고만 하고 넘어감
            print(f"'{col}' (object) 컬럼 변환 중 오류 발생 (무시): {e}")

print("large_string 변환 완료.")
# Step 0: 데이터셋 정리
# generated_data = self.clean_dataset(generated_data)


생성된 데이터 로드: 1174개 응답, 300개 문제
전체 컬럼 목록: ['problem_id', 'problem_text', 'ground_truth', 'response_id', 'generated_text', 'output_token_count', 'logprobs', 'worker_gpu', 'worker_replica', 'mean_group_confidence', 'bottom_10_percent_confidence', 'tail_confidence', 'lowest_group_confidence', 'top_10_percent_confidence', 'highest_group_confidence']
string 타입을 large_string으로 변환 중 (offset overflow 방지)...
변환 대상 컬럼: ['problem_id', 'problem_text', 'ground_truth', 'response_id', 'generated_text', 'worker_replica']
large_string 변환 완료.


In [32]:
import re
from math_verify import parse, verify

import re
from typing import Optional
import latex2sympy2
from sympy import simplify, sympify, N
from latex2sympy2 import latex2sympy

def grade_with_latex2sympy(answer: str, ground_truth: str) -> bool:
    """latex2sympy2 라이브러리 사용"""
    try:
        ans_expr = latex2sympy(answer)
        gt_expr = latex2sympy(ground_truth)
        diff = simplify(ans_expr - gt_expr)
        return diff == 0
    except:
        return False
    
def grade_with_latex2sympy_numeric(answer: str, ground_truth: str, tolerance=1e-10) -> bool:
    """latex2sympy2 - 수치 비교"""
    try:
        ans_expr = latex2sympy(answer)
        gt_expr = latex2sympy(ground_truth)
        diff = N(ans_expr - gt_expr)
        return abs(complex(diff)) < tolerance
    except:
        return False

def extract_final_answer_from_content(content: str) -> str:
    """
    content에서 최종 답안 추출 (여러 패턴 지원)
    
    Args:
        content: 답안이 포함된 텍스트
        
    Returns:
        추출된 답안 (없으면 빈 문자열)
    """
    if pd.isna(content) or not content:
        return ""
    
    content_str = str(content).strip()
    
    # 1. 마지막 \boxed{} 찾기 (가장 우선)
    boxed_matches = list(re.finditer(r'\\boxed\{', content_str))
    if boxed_matches:
        # 마지막 \boxed의 위치부터 시작
        last_start = boxed_matches[-1].end()
        
        # 중괄호 균형 맞춰서 추출
        brace_count = 1
        end_pos = last_start
        
        while end_pos < len(content_str) and brace_count > 0:
            if content_str[end_pos] == '{' and (end_pos == 0 or content_str[end_pos-1] != '\\'):
                brace_count += 1
            elif content_str[end_pos] == '}' and (end_pos == 0 or content_str[end_pos-1] != '\\'):
                brace_count -= 1
            end_pos += 1
        
        if brace_count == 0:
            answer = content_str[last_start:end_pos-1].strip()
            # LaTeX 명령어 정리 (선택사항)
            answer = re.sub(r'\\text\{([^}]+)\}', r'\1', answer)
            return answer
    
    # 2. 대체 패턴들
    fallback_patterns = [
        r'(?:최종\s*답|final\s*answer|답)[:\s]*([^\n]+)',
        r'therefore[,:\s]+([^\n]+)',
        r'=\s*([0-9.,]+)\s*$',  # 마지막 등식
    ]
    
    for pattern in fallback_patterns:
        match = re.search(pattern, content_str, re.IGNORECASE)
        if match:
            return match.group(1).strip()
    
    # 3. 마지막 줄에서 숫자 추출
    lines = [l.strip() for l in content_str.split('\n') if l.strip()]
    if lines:
        last_line = lines[-1]
        numbers = re.findall(r'-?[0-9]+\.?[0-9]*', last_line)
        if numbers:
            return numbers[-1]
    
    return ""

for idx, row in generated_data.iterrows():
    generated_data.loc[idx, 'final_answer'] = extract_final_answer_from_content(generated_data.loc[idx, 'generated_text'])
    generated_data.loc[idx, 'is_correct_exact'] = generated_data.loc[idx, 'final_answer'].strip().lower() == generated_data.loc[idx, 'ground_truth'].strip().lower()
    generated_data.loc[idx, 'is_correct_math_verifier'] = verify(generated_data.loc[idx, 'final_answer'], generated_data.loc[idx, 'ground_truth'])
    generated_data.loc[idx, 'is_correct_math_verifier_with_parser'] = verify(parse(generated_data.loc[idx, 'final_answer']), parse(generated_data.loc[idx, 'ground_truth']))
    generated_data.loc[idx, 'is_correct_math_verifier_generated_text'] = verify(parse(generated_data.loc[idx, 'generated_text']), parse(generated_data.loc[idx, 'ground_truth']))
    generated_data.loc[idx, 'is_correct_latex2sympy'] = grade_with_latex2sympy(generated_data.loc[idx, 'final_answer'], generated_data.loc[idx, 'ground_truth'])
    generated_data.loc[idx, 'is_correct_latex2sympy_numeric'] = grade_with_latex2sympy_numeric(generated_data.loc[idx, 'final_answer'], generated_data.loc[idx, 'ground_truth'])


ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generat

In [36]:

import pandas as pd

# 모든 컬럼 전체가 보이도록 설정
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

iloc_num = 0
print(f"problem_text: {generated_data.iloc[iloc_num]['problem_text']}")
print(f"generated_text: {generated_data.iloc[iloc_num]['generated_text']}")
print(f"ground_truth: {generated_data.iloc[iloc_num]['ground_truth']}")
print(f"final_answer: {generated_data.iloc[iloc_num]['final_answer']}")
print(f"is_correct_math_verifier: {generated_data.iloc[iloc_num]['is_correct_math_verifier']}")
print(f"is_correct_math_verifier_with_parser: {generated_data.iloc[iloc_num]['is_correct_math_verifier_with_parser']}")
print(f"is_correct_latex2sympy: {generated_data.iloc[iloc_num]['is_correct_latex2sympy']}")
print(f"is_correct_latex2sympy_numeric: {generated_data.iloc[iloc_num]['is_correct_latex2sympy_numeric']}")




problem_text: If $x=1+2^p$ and $y=1+2^{-p}$, then $y$ in terms of $x$ is
generated_text: We are given:

$$
x = 1 + 2^p
$$
$$
y = 1 + 2^{-p}
$$

We are to express $ y $ in terms of $ x $, i.e., find $ y $ as a function of $ x $.

---

### Step 1: Express $ 2^p $ in terms of $ x $

From the first equation:

$$
x = 1 + 2^p \Rightarrow 2^p = x - 1
$$

---

### Step 2: Express $ 2^{-p} $ in terms of $ x $

We know that:

$$
2^{-p} = \frac{1}{2^p}
$$

So,

$$
2^{-p} = \frac{1}{x - 1}
$$

---

### Step 3: Substitute into the expression for $ y $

$$
y = 1 + 2^{-p} = 1 + \frac{1}{x - 1}
$$

---

### Step 4: Simplify the expression

$$
y = 1 + \frac{1}{x - 1} = \frac{(x - 1) + 1}{x - 1} = \frac{x}{x - 1}
$$

---

### ✅ Final Answer:

$$
\boxed{y = \frac{x}{x - 1}}
$$
ground_truth: \frac{x}{x-1}
final_answer: y = \frac{x}{x - 1}
is_correct_math_verifier: False
is_correct_math_verifier_with_parser: False
is_correct_latex2sympy: False
is_correct_latex2sympy_numeric: False


In [6]:
from math_verify import parse, verify
sample_text = "y = \\frac{x + 1}{x - 1}"
# sample_text = generated_data.iloc[0]['problem_text']
sample_text_parsed = parse(sample_text)
sample_ground_truth = "frac{x}{x - 1}"
sample_ground_truth_parsed = parse(sample_ground_truth)
print(sample_text)
print(sample_text_parsed)
print(sample_ground_truth)
print(sample_ground_truth_parsed)
print(verify(sample_text, sample_ground_truth))
print(verify(sample_text_parsed, sample_ground_truth_parsed))


y = \frac{x + 1}{x - 1}
[1, '1']
frac{x}{x - 1}
[1, '1']
False
True


In [31]:
import pandas as pd
from typing import Dict
import re

# 설치
# !pip install math-verify sympy latex2sympy2 -q

from math_verify import parse, verify
from sympy import simplify, sympify, N
from sympy.parsing.latex import parse_latex
from latex2sympy2 import latex2sympy

import warnings
warnings.filterwarnings('ignore')  # ANTLR 경고 무시

class MultiLibraryMathGrader:
    """여러 수학 채점 라이브러리를 있는 그대로 활용"""
    
    
    def grade_with_math_verify(self, answer: str, ground_truth: str) -> bool:
        """math-verify 라이브러리 사용"""
        try:
            return verify(answer, ground_truth)
        except Exception as e:
            return False

    def grade_with_math_verify_with_parser(self, answer: str, ground_truth: str) -> bool:
        """math-verify 라이브러리 사용"""
        try:
            return verify(parse(answer), parse(ground_truth))
        except Exception as e:
            return False

    def grade_with_sympy_direct(self, answer: str, ground_truth: str) -> bool:
        """SymPy - sympify 직접 사용"""
        try:
            ans_expr = sympify(answer)
            gt_expr = sympify(ground_truth)
            diff = simplify(ans_expr - gt_expr)
            return diff == 0
        except:
            return False
    
    def grade_with_sympy_latex(self, answer: str, ground_truth: str) -> bool:
        """SymPy - LaTeX 파서 사용"""
        try:
            ans_expr = parse_latex(answer)
            gt_expr = parse_latex(ground_truth)
            diff = simplify(ans_expr - gt_expr)
            return diff == 0
        except:
            return False
    
    def grade_with_latex2sympy(self, answer: str, ground_truth: str) -> bool:
        """latex2sympy2 라이브러리 사용"""
        try:
            ans_expr = latex2sympy(answer)
            gt_expr = latex2sympy(ground_truth)
            diff = simplify(ans_expr - gt_expr)
            return diff == 0
        except:
            return False
    
    def grade_with_latex2sympy_numeric(self, answer: str, ground_truth: str, tolerance=1e-10) -> bool:
        """latex2sympy2 - 수치 비교"""
        try:
            ans_expr = latex2sympy(answer)
            gt_expr = latex2sympy(ground_truth)
            diff = N(ans_expr - gt_expr)
            return abs(complex(diff)) < tolerance
        except:
            return False
    def grade_with_exact(self, answer: str, ground_truth: str) -> bool:
        """정확한 비교"""
        return answer.strip().lower() == ground_truth.strip().lower()
    
    def grade_all(self, answer: str, ground_truth: str) -> Dict[str, bool]:
        """모든 라이브러리로 채점"""
        if not answer or pd.isna(answer):
            return {method: False for method in [
                'math_verify', 'math_verify_with_parser',
                'latex2sympy', 'latex2sympy_numeric', 'exact', 'any_correct'
            ]}
        
        results = {}
        
        # 1. math-verify
        results['math_verify'] = self.grade_with_math_verify(answer, ground_truth)
        
        # 2. math-verify (with parser)
        results['math_verify_with_parser'] = self.grade_with_math_verify_with_parser(answer, ground_truth)
        
        # 4. latex2sympy2
        results['latex2sympy'] = self.grade_with_latex2sympy(answer, ground_truth)
        
        
        # 6. latex2sympy2 (수치 비교)
        results['latex2sympy_numeric'] = self.grade_with_latex2sympy_numeric(answer, ground_truth)

        # 7. 정확한 비교    
        results['exact'] = self.grade_with_exact(answer, ground_truth)
        
        # 8. 최종 판정 (하나라도 True면 정답)
        results['any_correct'] = any([
            results['math_verify'],
            results['math_verify_with_parser'],
            results['latex2sympy'],
            results['latex2sympy_numeric'],
            results['exact']
        ])
        
        return results


def extract_final_answer_from_content(content: str) -> str:
    """content에서 최종 답안 추출"""
    if pd.isna(content) or not content:
        return ""
    
    content_str = str(content).strip()
    
    # \boxed{} 찾기
    boxed_matches = list(re.finditer(r'\\boxed\{', content_str))
    if boxed_matches:
        last_start = boxed_matches[-1].end()
        brace_count = 1
        end_pos = last_start
        
        while end_pos < len(content_str) and brace_count > 0:
            if content_str[end_pos] == '{' and (end_pos == 0 or content_str[end_pos-1] != '\\'):
                brace_count += 1
            elif content_str[end_pos] == '}' and (end_pos == 0 or content_str[end_pos-1] != '\\'):
                brace_count -= 1
            end_pos += 1
        
        if brace_count == 0:
            return content_str[last_start:end_pos-1].strip()
    
    return ""


# 메인 채점 코드
grader = MultiLibraryMathGrader()

for idx, row in generated_data.iterrows():
    # 답안 추출
    final_answer = extract_final_answer_from_content(row['generated_text'])
    generated_data.loc[idx, 'final_answer'] = final_answer
    
    # 모든 라이브러리로 채점
    grade_results = grader.grade_all(final_answer, row['ground_truth'])
    
    # 결과 저장
    generated_data.loc[idx, 'is_correct_math_verify'] = grade_results['math_verify']
    generated_data.loc[idx, 'is_correct_math_verify_with_parser'] = grade_results['math_verify_with_parser']
    generated_data.loc[idx, 'is_correct_latex2sympy'] = grade_results['latex2sympy']
    generated_data.loc[idx, 'is_correct_latex2sympy_numeric'] = grade_results['latex2sympy_numeric']
    generated_data.loc[idx, 'is_correct_exact'] = grade_results['exact']
    generated_data.loc[idx, 'is_correct'] = grade_results['any_correct']

# 결과 요약
print("=== 📊 채점 결과 요약 ===")
print(f"math-verify:              {generated_data['is_correct_math_verify'].sum()}/{len(generated_data)}")
print(f"math-verify (with parser): {generated_data['is_correct_math_verify_with_parser'].sum()}/{len(generated_data)}")
print(f"latex2sympy2:             {generated_data['is_correct_latex2sympy'].sum()}/{len(generated_data)}")
print(f"latex2sympy2 (numeric):   {generated_data['is_correct_latex2sympy_numeric'].sum()}/{len(generated_data)}")
print(f"exact:                   {generated_data['is_correct_exact'].sum()}/{len(generated_data)}")
print(f"─" * 50)
print(f"✅ 최종 정답 (ANY):        {generated_data['is_correct'].sum()}/{len(generated_data)}")

# 실패 케이스 분석
failures = generated_data[~generated_data['is_correct']].copy()
if len(failures) > 0:
    print(f"\n❌ 모든 방법이 실패한 케이스: {len(failures)}개")
    print("\n처음 5개 실패 케이스:")
    for idx, row in failures.head(5).iterrows():
        print(f"\n[{idx}]")
        print(f"  Answer:       {row['final_answer']}")
        print(f"  Ground Truth: {row['ground_truth']}")

ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generated code versions disagree: 4.7.2!=4.9.3
ANTLR runtime and generat

KeyError: "None of [Index([-1, -1, -1, -1, -2, -2, -2, -2, -2, -2,\n       ...\n       -2, -2, -2, -2, -1, -1, -1, -1, -1, -1],\n      dtype='object', length=1174)] are in the [columns]"

In [39]:

import pandas as pd

# 모든 컬럼 전체가 보이도록 설정
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

iloc_num = 0
print(f"generated_text: {generated_data.iloc[iloc_num]['generated_text']}")
print(f"ground_truth: {generated_data.iloc[iloc_num]['ground_truth']}")
print(f"final_answer: {generated_data.iloc[iloc_num]['final_answer']}")
print(f"is_correct_math_verify: {generated_data.iloc[iloc_num]['is_correct_math_verify']}")
print(f"is_correct_latex2sympy_numeric: {generated_data.iloc[iloc_num]['is_correct_latex2sympy_numeric']}")



generated_text: We are given:

$$
x = 1 + 2^p
$$
$$
y = 1 + 2^{-p}
$$

We are to express $ y $ in terms of $ x $, i.e., find $ y $ as a function of $ x $.

---

### Step 1: Express $ 2^p $ in terms of $ x $

From the first equation:

$$
x = 1 + 2^p \Rightarrow 2^p = x - 1
$$

---

### Step 2: Express $ 2^{-p} $ in terms of $ x $

We know that:

$$
2^{-p} = \frac{1}{2^p}
$$

So,

$$
2^{-p} = \frac{1}{x - 1}
$$

---

### Step 3: Substitute into the expression for $ y $

$$
y = 1 + 2^{-p} = 1 + \frac{1}{x - 1}
$$

---

### Step 4: Simplify the expression

$$
y = 1 + \frac{1}{x - 1} = \frac{(x - 1) + 1}{x - 1} = \frac{x}{x - 1}
$$

---

### ✅ Final Answer:

$$
\boxed{y = \frac{x}{x - 1}}
$$
ground_truth: \frac{x}{x-1}
final_answer: y = \frac{x}{x - 1}
is_correct_math_verify: False
is_correct_latex2sympy_numeric: True


In [11]:
empty_content_rows = generated_data[generated_data['final_answer'].isnull() | (generated_data['final_answer'].astype(str).str.strip() == '')]
print(f"'final_answer' 컬럼이 비어있는 응답 개수: {len(empty_content_rows)}")

'final_answer' 컬럼이 비어있는 응답 개수: 3


In [2]:
empty_content_rows = generated_data[generated_data['content'].isnull() | (generated_data['content'].astype(str).str.strip() == '')]
print(f"'content' 컬럼이 비어있는 응답 개수: {len(empty_content_rows)}")
# final_answer가 비어있는 응답
empty_final_answer_rows = generated_data[generated_data['final_answer'].isnull() | (generated_data['final_answer'].astype(str).str.strip() == '')]
print(f"'final_answer' 컬럼이 비어있는 응답 개수: {len(empty_final_answer_rows)}")

# 두 조건이 모두 비어있는 응답
both_empty = generated_data[
    (generated_data['final_answer'].isnull() | (generated_data['final_answer'].astype(str).str.strip() == '')) &
    (generated_data['content'].isnull() | (generated_data['content'].astype(str).str.strip() == ''))
]
print(f"'final_answer'와 'content' 모두 비어있는 응답 개수: {len(both_empty)}")

# 교집합이 전체 내에서 어떤 비중인지
if len(empty_final_answer_rows) > 0:
    overlap_ratio = len(both_empty) / len(empty_final_answer_rows) * 100
    print(f"'final_answer'가 비어있는 응답 중 'content'도 비어있는 비율: {overlap_ratio:.2f}%")
if len(empty_content_rows) > 0:
    overlap_ratio2 = len(both_empty) / len(empty_content_rows) * 100
    print(f"'content'가 비어있는 응답 중 'final_answer'도 비어있는 비율: {overlap_ratio2:.2f}%")



KeyError: 'content'

In [4]:
# final_answer가 비어있는 응답 개수 세기 (file_context_0 참고)
num_empty_final_answer = generated_data[
    generated_data['final_answer'].notna() & 
    (generated_data['final_answer'] != '')
].shape[0]
print(f"'final_answer'가 비어있지 않은 응답 개수: {num_empty_final_answer}")


KeyError: 'final_answer'

In [None]:
# content 컬럼에서 토큰(공백 기준) 수가 1000개 이상인 row 개수 세기 (file_context_0 참고)
content_token_counts = generated_data['content'].astype(str).apply(lambda x: int(len(x.split()) * 1.3))
long_token_content_rows = generated_data[content_token_counts >= 1000]
num_long_token_content = len(long_token_content_rows)
print(f"'content' 컬럼에서 토큰(공백 기준 * 1.3)이 1000개 이상인 응답 개수: {num_long_token_content}")


KeyError: 'problem'