In [7]:
import re
import os
from datasets import load_dataset
file_path = r"../data/data.txt"

In [8]:
if not os.path.exists(file_path):
    print(f"❌ 파일을 찾을 수 없습니다: {file_path}")
else:
    # 2. 데이터 로드 (split='train'을 명시해야 Dataset 객체로 바로 반환됩니다)
    # 기존 캐시 문제를 방지하기 위해 download_mode를 추가할 수 있습니다.
    dataset = load_dataset("text", data_files={"train": file_path}, split='train')

Generating train split: 12234 examples [00:00, 427755.44 examples/s]


In [9]:
def parse_line(example):
        line = example['text']
        
        # [Regex] 앞부분 숫자 6개 (아이템 개수 및 가치)
        input_match = re.match(r'^(\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+)', line)
        input_vals = input_match.group(1) if input_match else ""
        
        # [Regex] 대화 내용 (<selection> 이전까지)
        dialogue_match = re.search(r'(?:YOU:|THEM:).*?(?=<selection>)', line)
        dialogue = dialogue_match.group(0).strip() if dialogue_match else ""
        
        # [Regex] 보상(reward) 숫자 추출
        reward_match = re.search(r'reward=(\d+)', line)
        reward = int(reward_match.group(1)) if reward_match else 0
        
        # [Regex] 합의 여부 (agree / disagree)
        is_agree = "agree" in line and "disagree" not in line
        
        return {
            "input": input_vals,
            "dialogue": dialogue,
            "reward": reward,
            "is_agree": is_agree
        }

In [10]:
parsed_ds = dataset.map(parse_line)
    
# 데이터가 정상 로드되었는지 확인
print(f"✅ 총 로드된 데이터: {len(parsed_ds)}개")
if len(parsed_ds) > 0:
    print("첫 번째 데이터 샘플:", parsed_ds[0])

Map: 100%|██████████| 12234/12234 [00:00<00:00, 52295.11 examples/s]

✅ 총 로드된 데이터: 12234개
첫 번째 데이터 샘플: {'text': '1 0 4 2 1 2 YOU: i would like 4 hats and you can have the rest . <eos> THEM: deal <eos> YOU: <selection> item0=0 item1=4 item2=0 <eos> reward=8 agree 1 4 4 1 1 2', 'input': '1 0 4 2 1 2', 'dialogue': 'YOU: i would like 4 hats and you can have the rest . <eos> THEM: deal <eos> YOU:', 'reward': 8, 'is_agree': True}





In [11]:
best_df = parsed_ds.filter(
    lambda x: x['is_agree'] and 6 <= x['reward'] <= 8
).to_pandas()

Filter: 100%|██████████| 12234/12234 [00:00<00:00, 208133.02 examples/s]


In [13]:
print(best_df['reward'].value_counts())

reward
6    2258
8    2236
7    1508
Name: count, dtype: int64


In [14]:
# 2. 대화의 질을 평가하는 함수 정의
def evaluate_quality(dialogue):
    # 1. 턴 수 확장: 최소 4턴 이상이어야 '제안-거절-재제안-합의'의 루프가 발생함
    turns_you = dialogue.count("YOU:")
    turns_them = dialogue.count("THEM:")
    total_turns = turns_you + turns_them
    
    # 2. 텍스트 길이 확장: 충분한 정보 교환이 일어났는지 확인 (공백 제외 기준)
    clean_dialogue = dialogue.replace("<eos>", "").replace("YOU:", "").replace("THEM:", "").strip()
    length = len(clean_dialogue)

    # 3. 전략적 키워드 포함 여부 (심리학적 설득 요소)
    # 이유 제시(because, need), 조건부 제안(if), 양보(split, both) 등
    persuasion_keywords = ['because', 'need', 'if', 'split', 'both', 'give', 'take', 'only']
    has_strategy = any(word in dialogue.lower() for word in persuasion_keywords)

    # 4. 필터링 조건 (다각도 검토)
    # - 턴 수: 5~10턴 (너무 길면 루프에 빠진 데이터일 수 있음)
    # - 길이: 200~600자 (논리적 근거가 포함될 수 있는 충분한 길이)
    # - 전략 키워드가 최소 2개 이상 포함되어 있는가?
    is_dense = 5 <= total_turns <= 10
    is_informative = 200 <= length <= 600
    
    if is_dense and is_informative and has_strategy:
        return True
    return False

In [15]:
filtered_candidates = best_df[best_df['dialogue'].apply(evaluate_quality)]

In [16]:
print(f"후보군 개수: {len(filtered_candidates)}")
for i, row in filtered_candidates.head(10).iterrows():
    print(f"--- 후보 {i} (점수: {row['reward']}, 길이: {len(row['dialogue'])}) ---")
    print(row['dialogue'])
    print("\n")

후보군 개수: 1608
--- 후보 3 (점수: 7, 길이: 396) ---
YOU: im a reader , so id like the books . . . . you may have the hats and ball <eos> THEM: let me have two books and the hats <eos> YOU: its a trilogy so i really need to hold on to all the books <eos> THEM: cant do it <eos> YOU: ok , well best i can do is 2 books and the ball then . . . anything less and i cant make a deal <eos> THEM: so the hats and a book for me ? <eos> YOU: yes <eos> THEM:


--- 후보 5 (점수: 8, 길이: 304) ---
YOU: i love basketball and reading <eos> THEM: no . i want the hat and the balls <eos> YOU: both balls ? <eos> THEM: yeah or 1 ball and 1 book <eos> YOU: ok i want the hat and you can have the rest <eos> THEM: okay deal ill take the books and the balls you can have only the hat <eos> YOU: ok <eos> THEM:


--- 후보 6 (점수: 7, 길이: 304) ---
THEM: i love basketball and reading <eos> YOU: no . i want the hat and the balls <eos> THEM: both balls ? <eos> YOU: yeah or 1 ball and 1 book <eos> THEM: ok i want the hat and you can have t

In [17]:
def calculate_quality_score(row):
    dialogue = row['dialogue']
    reward = row['reward']
    
    # 1. 전략 키워드 개수 카운트 (다양할수록 가점)
    persuasion_keywords = ['because', 'need', 'if', 'split', 'both', 'give', 'take', 'only', 'deal', 'sorry']
    strategy_count = sum(1 for word in persuasion_keywords if word in dialogue.lower())
    
    # 2. 대화의 길이와 턴 수의 균형 (너무 길지도 짧지도 않은 적정성)
    turns = dialogue.count("YOU:") + dialogue.count("THEM:")
    length = len(dialogue)
    
    # 점수 산출 로직: 전략 키워드가 많고, 턴당 글자 수가 적절한지
    # (HCI 관점에서 '설득'이 포함된 적정 길이 대화에 높은 가중치)
    score = (strategy_count * 10) + (length / turns)
    
    # 보상 점수가 7-8점인 경우 약간의 가중치 (성공적인 협상)
    if reward >= 7:
        score += 5
        
    return score

# 필터링 후 점수 계산
filtered_candidates = best_df[best_df['dialogue'].apply(evaluate_quality)].copy()
filtered_candidates['quality_score'] = filtered_candidates.apply(calculate_quality_score, axis=1)

# 점수 순으로 정렬 후, 보상(reward)별로 상위 3개씩 추출하여 다양성 확보
final_samples = filtered_candidates.sort_values(by='quality_score', ascending=False).groupby('reward').head(3)

print(f"최종 선발된 퓨샷 후보 개수: {len(final_samples)}")
for i, row in final_samples.iterrows():
    print(f"--- [Score: {row['quality_score']:.1f}] 점수: {row['reward']} ---")
    print(row['dialogue'])
    print("-" * 50)

최종 선발된 퓨샷 후보 개수: 9
--- [Score: 149.1] 점수: 8 ---
THEM: hi i would like the balls and 1 hat and you can have the books and 1 hat <eos> YOU: your math doesn't add up , try again . <eos> THEM: it adds up for me . the best i can do is offer you the hat and books . and please , i'd appreciate it if you talked to me with respect . <eos> YOU: you said we both get a hat when there is only 1 diserepectful just direct not supposed to be a long hit and these are negotiations . <eos> THEM: im sorry that was a mistake . i apologize for it . i am willing to give you the books and hat . <eos> YOU: i'll take that have a good day . : ) <eos> THEM:
--------------------------------------------------
--- [Score: 147.4] 점수: 6 ---
YOU: i'd like both balls and at least 1 hat . are the books of value to you ? <eos> THEM: sorry i need both hats . you can have both balls and a book if youd like <eos> YOU: books are worth nothing to me . . . no chance you'll let go of just one of the hats and take the books ? <eo