**นายพชรพล เกตุแก้ว รหัสนักศึกษา 6610110190**
# Food Recommendation Dataset
- ชุดข้อมูลแนะนำอาหาร


---

In [None]:
import pandas as pd
import random

# ข้อความตัวอย่างที่อธิบายอาหารแต่ละชนิดโดยใช้คำใบ้แบบโดยนัย (ไม่เอ่ยชื่ออาหารตรงๆ)
implicit_hints = {
    'ice_cream': [
        "I want something sweet and cold that melts in my mouth",
        "I need a refreshing treat on a hot summer day",
        "I'm craving a dessert that comes in a cup or cone",
        "I want something creamy and sweet after dinner",
        "I'm thinking of a dessert that drips if you don't eat it fast",
        "I want something topped with chocolate sauce or nuts",
        "I'm craving a frozen dessert with flavors like vanilla, strawberry, or chocolate",
        "I want something so cold it makes my teeth chatter",
        "I'm thinking of a treat kids love during summer",
        "I want a dessert that’s soft, cold, and sweet all at once"
    ],
    'sushi': [
        "I want to eat something with vinegared rice and fresh raw fish on top",
        "I'm craving Japanese food served with soy sauce and wasabi",
        "I want small, delicate bites with bold umami flavor",
        "I'm thinking of something fresh, chilled, and neatly plated",
        "I want food wrapped in seaweed with rice and fish inside",
        "I'm craving something you eat with chopsticks, one piece at a time",
        "I want authentic Japanese cuisine made with the freshest seafood",
        "I'm thinking of a dish that looks simple but takes skill to make",
        "I want something lightly salty from soy sauce and clean-tasting",
        "I'm craving beautifully arranged bite-sized Japanese delicacies"
    ],
    'pizza': [
        "I want a large, hot baked dish with gooey melted cheese on top",
        "I'm craving something with a crispy crust, tomato sauce, and toppings",
        "I want food that’s perfect for sharing with friends",
        "I'm thinking of a triangle-shaped slice with stretchy cheese",
        "I want an Italian classic that smells amazing straight from the oven",
        "I'm craving something delivered in a round box with steam rising",
        "I want a meal with endless topping combinations",
        "I'm thinking of something you cut with a pizza wheel",
        "I want cheesy, saucy, and satisfying comfort food",
        "I'm craving something that pairs perfectly with a cold soda"
    ],
    'hamburger': [
        "I want a sandwich with a juicy patty between two soft buns",
        "I'm craving something handheld with beef, lettuce, tomato, and sauce",
        "I want fast food that’s messy, satisfying, and filling",
        "I'm thinking of a classic American meal served with fries",
        "I want something with melted cheese oozing out the sides",
        "I'm craving a burger that’s juicy on the first bite",
        "I want a quick meal wrapped in paper or a small box",
        "I'm thinking of something you can eat with one hand",
        "I want layers of bread, meat, veggies, and condiments in every bite",
        "I'm craving a hearty, savory sandwich from a fast-food joint"
    ],
    'apple_pie': [
        "I want a warm dessert that smells like cinnamon and baked apples",
        "I'm craving something with a flaky crust and sweet fruit filling",
        "I want a classic homemade dessert that pairs with hot coffee or tea",
        "I'm thinking of a golden-brown pie fresh out of the oven",
        "I want something sweet-tart with a buttery, crisp crust",
        "I'm craving a dessert often served with a scoop of vanilla ice cream",
        "I want a comforting treat that smells like home baking",
        "I'm thinking of a bakery favorite with layers of pastry and spiced apples",
        "I want a dessert that’s crunchy outside and soft inside",
        "I'm craving a traditional pie with a cozy, nostalgic feel"
    ]
}

# ข้อความตัวอย่างที่ระบุชื่ออาหารโดยตรง (explicit)
explicit_templates = {
    'ice_cream': [
        "I want ice cream", "I feel like eating ice cream", "I'm craving ice cream",
        "Let's get some ice cream", "Nothing sounds better than ice cream right now"
    ],
    'sushi': [
        "I want sushi", "I feel like eating sushi", "I'm craving sushi",
        "Let's order sushi", "I'm in the mood for Japanese food"
    ],
    'pizza': [
        "I want pizza", "I feel like eating pizza", "I'm craving pizza",
        "Let's order a pizza", "I'm in the mood for cheesy pizza"
    ],
    'hamburger': [
        "I want a hamburger", "I feel like eating a burger", "I'm craving a hamburger",
        "Let's grab a burger", "I'm in the mood for a juicy burger"
    ],
    'apple_pie': [
        "I want apple pie", "I feel like eating apple pie", "I'm craving apple pie",
        "Let's get some apple pie", "I'm in the mood for a warm slice of pie"
    ]
}

# คำนำหน้าและลงท้ายเพื่อสร้างประโยคหลากหลาย
prefixes = [
    "Right now, ", "Today, ", "Lately, ", "After work, ", "This weekend, ",
    "Honestly, ", "I’ve been thinking… ", "All of a sudden, ", "For some reason, ",
    "I can’t stop thinking about ", "I really need ", "I’d love to have ",
    "If I could eat anything, it would be ", "I’m daydreaming about ",
    "My stomach is growling for "
]

suffixes = [
    " right now.", " at this very moment.", " more than anything.",
    " and I won’t be happy until I get it.", ". That’s exactly what I need.",
    ". It’s been on my mind all day.", ". Nothing else will do.",
    ". I can almost taste it.", ". My mouth is watering just thinking about it."
]

# รายการเก็บข้อมูลที่สร้างขึ้น
data = []

# ฟังก์ชันสร้างตัวอย่างข้อความสำหรับแต่ละหมวดอาหาร
def generate_examples(label, explicit_list, implicit_list, n_synthetic=800):
    for text in explicit_list:
        data.append({"text": text + ".", "label": label})

    for hint in implicit_list:
        data.append({"text": hint + ".", "label": label})

    for _ in range(n_synthetic):
        base = random.choice(implicit_list)
        clean_base = base
        if base.startswith("I want "):
            clean_base = base[7:].lower()
        elif base.startswith("I'm craving "):
            clean_base = base[12:].lower()
        elif base.startswith("I'm thinking of "):
            clean_base = base[16:].lower()

        style = random.choice(['prefix', 'suffix', 'both', 'plain'])
        if style == 'prefix':
            text = random.choice(prefixes) + clean_base
        elif style == 'suffix':
            text = "I want " + clean_base + random.choice(suffixes)
        elif style == 'both':
            text = random.choice(prefixes) + clean_base + random.choice(suffixes)
        else:
            text = base

        text = text[0].upper() + text[1:] if text else text
        data.append({"text": text, "label": label})

# สร้างข้อมูลสำหรับทุกหมวดอาหาร
for label in implicit_hints.keys():
    generate_examples(label, explicit_templates[label], implicit_hints[label], n_synthetic=900)

# รวมข้อมูลเป็น DataFrame และสุ่มลำดับ
df = pd.DataFrame(data)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

output_path = "/content/food_recommendation_dataset.csv"
df.to_csv(output_path, index=False, encoding="utf-8")

print("English food recommendation dataset generated successfully!")
print(f"Total samples: {len(df)}")
print(f"Class distribution:\n{df['label'].value_counts().sort_index()}")
print("\nSample entries:")
print(df.sample(10, random_state=1).to_string(index=False))

จากโค้ดด้านบน ทำหน้าที่
- รวบรวมตัวอย่างข้อความที่อธิบายอาหาร 5 ประเภท (`apple_pie`, `hamburger`, `ice_cream`, `pizza`, `sushi`) ทั้งในรูปแบบชัดเจน (explicit) และโดยนัย (implicit) เพื่อฝึกโมเดลให้เข้าใจบริบทแทนการพึ่งพาคำหลัก  
- สร้างข้อความสังเคราะห์เพิ่มเติมโดยการผสมคำนำหน้า (prefixes) และคำลงท้าย (suffixes) เข้ากับคำใบ้แบบโดยนัย เพื่อเพิ่มความหลากหลายทางภาษาและลดการ overfitting  
- จัดเก็บข้อมูลทั้งหมดลงใน DataFrame พร้อมสลับลำดับแบบสุ่ม และบันทึกเป็นไฟล์ CSV สำหรับใช้ฝึกโมเดล NLP ต่อไป  
- แสดงจำนวนตัวอย่างรวมและการกระจายของแต่ละคลาส เพื่อให้มั่นใจว่าชุดข้อมูลมีความสมดุลและเหมาะสมกับการฝึกโมเดลจำแนกประเภทอาหาร