In [8]:
import json
import itertools
import random

# Define all 18 drinks with their attributes
drink_data = [
    {"name": "허브티", "temperature": ["핫"], "add_ons": []},
    {"name": "에스프레소", "temperature": ["핫"], "add_ons": ["샷 추가"]},
    {"name": "토마토주스", "temperature": ["아이스"], "add_ons": []},
    {"name": "키위주스", "temperature": ["아이스"], "add_ons": []},
    {"name": "망고스무디", "temperature": ["아이스"], "add_ons": []},
    {"name": "딸기스무디", "temperature": ["아이스"], "add_ons": []},
    {"name": "레몬에이드", "temperature": ["아이스"], "add_ons": []},
    {"name": "복숭아아이스티", "temperature": ["아이스"], "add_ons": []},
    {"name": "아포카토", "temperature": ["아이스"], "add_ons": ["샷 추가"]},
    {"name": "쿠키앤크림", "temperature": ["아이스"], "add_ons": ["휘핑크림"]},
    {"name": "카페라떼", "temperature": ["핫", "아이스"], "add_ons": ["샷 추가"]},
    {"name": "바닐라라떼", "temperature": ["핫", "아이스"], "add_ons": ["샷 추가", "바닐라시럽", "휘핑크림"]},
    {"name": "초콜릿라떼", "temperature": ["핫", "아이스"], "add_ons": ["휘핑크림"]},
    {"name": "카푸치노", "temperature": ["핫", "아이스"], "add_ons": ["샷 추가", "휘핑크림"]},
    {"name": "아메리카노", "temperature": ["핫", "아이스"], "add_ons": ["샷 추가"]},
    {"name": "카라멜마끼아또", "temperature": ["핫", "아이스"], "add_ons": ["샷 추가", "카라멜시럽", "휘핑크림"]},
    {"name": "카페모카", "temperature": ["핫", "아이스"], "add_ons": ["휘핑크림"]},
    {"name": "말차라떼", "temperature": ["핫", "아이스"], "add_ons": ["휘핑크림"]},
]

# Input variations
add_phrases = ["주세요", "추가해주세요", "넣어주세요"]
update_phrases = ["바꿔주세요", "변경해주세요"]
replace_phrases = ["대신 주세요"]
remove_phrases = ["빼주세요", "삭제해주세요", "취소해주세요"]
add_addon_phrases = ["추가해서 주세요", "넣어주세요"]
remove_addon_phrases = ["빼주세요", "제거해주세요", "없애주세요"]
cancel_phrases = ["전체 취소해주세요", "주문 취소해주세요", "모든 것을 취소해주세요"] 

# Generate dataset
examples = []
sizes = ["미디움", "라지", "엑스라지"]
quantities = range(1, 11)  # Quantities from 1 to 10

# Generate all combinations
for drink in drink_data:
    name = drink["name"]
    valid_temps = drink["temperature"]
    valid_add_ons = drink["add_ons"]

    for size in sizes:
        for temp in valid_temps:
            add_on_combinations = [["None"]] if not valid_add_ons else [["None"]] + [
                list(combo) for r in range(1, len(valid_add_ons) + 1)
                for combo in itertools.combinations(valid_add_ons, r)
            ]

            for add_ons in add_on_combinations:
                for quantity in quantities:
                    # Current order
                    target_indexes = [f"0-{i}" for i in range(quantity)]
                    current_orders = {
                        "drinks": [
                            {
                                "target_indexes": target_indexes,
                                "name": name,
                                "size": size,
                                "temperature": temp,
                                "quantity": quantity,
                                "add_ons": add_ons if add_ons != ["None"] else "None",
                            }
                        ]
                    }

                    # Add Examples
                    for size_case in [None, "미디움", "라지", "엑스라지"]:
                        for phrase in add_phrases:
                            # Default case: No size specified, default to 미디움
                            if size_case is None:
                                input_text = f"{name} {quantity}잔 {phrase}"
                                response = f"new_order_item name: {name} size: 미디움 temperature: {temp} quantity: {quantity} add_ons: None"
                            else:
                                # Explicit size case
                                input_text = f"{name} {size_case} {quantity}잔 {phrase}"
                                response = f"new_order_item name: {name} size: {size_case} temperature: {temp} quantity: {quantity} add_ons: None"
                    
                            examples.append({
                                "current_orders": {"drinks": []},
                                "input": input_text,
                                "response": response,
                            })

                    # Update All Cups
                    new_name = random.choice([d["name"] for d in drink_data if d["name"] != name])
                    for phrase in update_phrases:
                        input_text = f"{name}를 {new_name}로 {phrase}"
                        response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{new_name: {new_name}}}"
                        examples.append({
                            "current_orders": current_orders,
                            "input": input_text,
                            "response": response,
                        })

                    # Replacement with "대신 주세요"
                    for phrase in replace_phrases:
                        input_text = f"{name} {quantity}잔 대신 {new_name} 주세요" if quantity > 1 else f"{name} 대신 {new_name} 주세요"
                        response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{new_name: {new_name}}}"
                        examples.append({
                            "current_orders": current_orders,
                            "input": input_text,
                            "response": response,
                        })

                    # Remove All Cups
                    for phrase in remove_phrases:
                        input_text = f"{name}를 {phrase}"
                        response = f"delete_item target_indexes: {json.dumps(target_indexes)}"
                        examples.append({
                            "current_orders": current_orders,
                            "input": input_text,
                            "response": response,
                        })

                    # Add Attribute Combinations (Paste Here)
                    for size_case in ["미디움", "라지", "엑스라지"]:  # Sizes
                        for temp_case in valid_temps:  # Temperatures
                            if temp_case not in valid_temps:
                                continue  # Skip invalid temperature combinations for the drink

                            for addon_combo in add_on_combinations:  # Add-ons
                                # 1. Size Change Only
                                input_text = f"{name}를 {size_case}로 바꿔주세요"
                                response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{size: {size_case}}}"
                                examples.append({
                                    "current_orders": current_orders,
                                    "input": input_text,
                                    "response": response,
                                })

                                # 2. Temperature Change Only
                                input_text = f"{name}를 {temp_case}로 바꿔주세요"
                                response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{temperature: {temp_case}}}"
                                examples.append({
                                    "current_orders": current_orders,
                                    "input": input_text,
                                    "response": response,
                                })

                                # 3. Add-ons Added
                                if addon_combo != ["None"]:  # Skip cases with no add-ons
                                    for addon in addon_combo:
                                        input_text = f"{name}에 {addon} 추가해주세요"
                                        response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{add_ons: {addon}}}"
                                        examples.append({
                                            "current_orders": current_orders,
                                            "input": input_text,
                                            "response": response,
                                        })

                                # 4. Add-ons Removed
                                if addon_combo != ["None"]:
                                    for addon in addon_combo:
                                        input_text = f"{name}에서 {addon} 빼주세요"
                                        response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{remove_add_ons: {addon}}}"
                                        examples.append({
                                            "current_orders": current_orders,
                                            "input": input_text,
                                            "response": response,
                                        })

                                # 5. Multiple Attribute Changes (Size + Temperature)
                                input_text = f"{name}를 {size_case} {temp_case}로 바꿔주세요"
                                response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{size: {size_case}, temperature: {temp_case}}}"
                                examples.append({
                                    "current_orders": current_orders,
                                    "input": input_text,
                                    "response": response,
                                })

                                # 6. Multiple Attribute Changes (Size + Add-ons)
                                if addon_combo != ["None"]:
                                    input_text = f"{name}를 {size_case}로 바꾸고 {addon_combo[0]} 추가해주세요"
                                    response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{size: {size_case}, add_ons: {addon_combo[0]}}}"
                                    examples.append({
                                        "current_orders": current_orders,
                                        "input": input_text,
                                        "response": response,
                                    })

                                # 7. All Attribute Changes
                                if addon_combo != ["None"]:
                                    input_text = f"{name}를 {size_case} {temp_case}로 바꾸고 {addon_combo[0]} 추가해주세요"
                                    response = f"update_item target_indexes: {json.dumps(target_indexes)} updates: {{size: {size_case}, temperature: {temp_case}, add_ons: {addon_combo[0]}}}"
                                    examples.append({
                                        "current_orders": current_orders,
                                        "input": input_text,
                                        "response": response,
                                    })

                    # Remove Specific Quantities
                    for num in range(1, quantity):
                        selected_indexes = target_indexes[:num]
                        for phrase in remove_phrases:
                            input_text = f"{name} {quantity}잔 중 {num}잔 {phrase}"
                            response = f"delete_item target_indexes: {json.dumps(selected_indexes)}"
                            examples.append({
                                "current_orders": current_orders,
                                "input": input_text,
                                "response": response,
                            })


                    # Update Specific Quantities
                    for num in range(1, quantity):
                        selected_indexes = target_indexes[:num]
                        for phrase in update_phrases:
                            input_text = f"{name} {quantity}잔 중 {num}잔을 {new_name}로 {phrase}"
                            response = f"update_item target_indexes: {json.dumps(selected_indexes)} updates: {{new_name: {new_name}}}"
                            examples.append({
                                "current_orders": current_orders,
                                "input": input_text,
                                "response": response,
                            })

                    # Cancel All Orders
                    for phrase in cancel_phrases:
                        input_text = f"{phrase}"  # e.g., "전체 취소해주세요"
                        response = "cancel_all_orders"
                        examples.append({
                            "current_orders": current_orders,
                            "input": input_text,
                            "response": response,
                        })

# Save examples
with open("final_coffee_kiosk_dataset.jsonl", "w", encoding="utf-8") as f:
    for example in examples:
        f.write(json.dumps(example, ensure_ascii=False) + "\n")

print(f"Dataset with {len(examples)} examples saved to final_coffee_kiosk_dataset.jsonl!")


Dataset with 533115 examples saved to final_coffee_kiosk_dataset.jsonl!


In [9]:
import json

# Load dataset
input_file = "final_coffee_kiosk_dataset.jsonl"
output_file = "cleaned_coffee_kiosk_dataset.jsonl"

# Read the JSONL file
with open(input_file, "r", encoding="utf-8") as f:
    examples = [json.loads(line.strip()) for line in f]

# Remove duplicates using a set for unique entries
unique_examples = []
seen = set()

for example in examples:
    # Create a tuple that represents the unique combination of fields
    unique_key = (
        json.dumps(example["current_orders"], sort_keys=True, ensure_ascii=False),
        example["input"],
        example["response"]
    )
    if unique_key not in seen:
        seen.add(unique_key)
        unique_examples.append(example)

# Write the cleaned dataset back to a new JSONL file
with open(output_file, "w", encoding="utf-8") as f:
    for example in unique_examples:
        f.write(json.dumps(example, ensure_ascii=False) + "\n")

print(f"Cleaned dataset with {len(unique_examples)} unique examples saved to {output_file}!")


Cleaned dataset with 139635 unique examples saved to cleaned_coffee_kiosk_dataset.jsonl!
