In [None]:
# ✅ STEP 1: Upload & Load Data
import pandas as pd

recipes_df = pd.read_csv("/content/70000_recipes_nutrients.csv")
health_df = pd.read_csv("/content/health_age_data_70000_synthetic.csv")

# Normalize columns to lowercase for safety
recipes_df.columns = [col.strip().lower() for col in recipes_df.columns]
health_df.columns = [col.strip().lower() for col in health_df.columns]

print("✅ Recipes Loaded:", recipes_df.shape)
print("✅ Health Data Loaded:", health_df.shape)

# ✅ STEP 2: Define Cleaning Function for Ingredients
def clean_ingredients(raw_text):
    if pd.isna(raw_text): return []
    raw_text = raw_text.lower()
    ingredients = [i.strip(" .:-") for i in raw_text.split(",") if i.strip()]
    return list(set(ingredients))

# ✅ STEP 3: Build Health Rules from Synthetic Data

def build_health_rules(df):
    rules = {}
    # Expecting df to have 'disease', 'avoid', 'recommend' columns
    if not all(k in df.columns for k in ["disease", "avoid", "recommend"]):
        raise KeyError("health_rules.csv must include 'disease', 'avoid', 'recommend' columns.")
    for _, row in df.iterrows():
        condition = row["disease"].strip().lower()
        avoid = [i.strip().lower() for i in str(row["avoid"]).split(",") if i.strip()]
        recommend = [i.strip().lower() for i in str(row["recommend"]).split(",") if i.strip()]
        rules[condition] = {"avoid": avoid, "recommend": recommend}
    return rules

# Create a small health_rules file manually if not present
sample_rules = pd.DataFrame({
    "disease": ["diabetes", "heart disease"],
    "avoid": ["sugar, salt, white rice", "ghee, fried foods"],
    "recommend": ["turmeric, olive oil, cinnamon, spinach", "chia seeds, broccoli, olive oil"]
})
sample_rules.to_csv("health_rules.csv", index=False)
health_rules = build_health_rules(pd.read_csv("health_rules.csv"))
print("⚕️ Health rules loaded:", health_rules.keys())

# ✅ STEP 4: Nutrition Target Fetcher
def get_user_nutrient_goals(age):
    row = health_df[health_df["ages"] == age].iloc[0]
    return {
        "Daily Calorie Target": row["daily calorie target"],
        "Protein": row["protein"],
        "Carbohydrates": row["carbohydrates"],
        "Fat": row["fat"],
        "Fiber": row["fiber"],
    }

# ✅ STEP 5: Get Recipe by Dish Name
def get_recipe_by_name(dish_name):
    row = recipes_df[recipes_df["recipe_name"].str.lower() == dish_name.lower()]
    if not row.empty:
        return row.iloc[0]
    return None

# ✅ STEP 6: Build Strict Prompt for GPT-2
def build_strict_prompt(dish_name, age, disease, ingredients, avoid, recommend, targets):
    cleaned = clean_ingredients(ingredients)
    avoid_str = ', '.join(set(avoid))
    recommend_str = ', '.join(set(recommend))

    prompt = f"""
You are a professional nutrition expert.

A {age}-year-old user has been diagnosed with {disease}. They frequently consume the dish: \"{dish_name}\".

Below are the cleaned ingredients of the dish:
{', '.join(cleaned)}

According to dietary guidelines for {disease}, the following ingredients should be avoided: {avoid_str}, and the following are recommended: {recommend_str}.

The user has the following daily nutritional needs:
- Calories: {targets.get('Daily Calorie Target', 'N/A')}
- Protein: {targets.get('Protein', 'N/A')}g
- Carbs: {targets.get('Carbohydrates', 'N/A')}g
- Fat: {targets.get('Fat', 'N/A')}g
- Fiber: {targets.get('Fiber', 'N/A')}g

---

Your task is to suggest an improved version of the dish, keeping it flavorful but healthier.

Write a single, coherent paragraph that includes:
1. The full cleaned ingredient list.
2. The ingredients to remove and the reason for each.
3. The ingredients to add and why they help.
4. A short explanation of how these changes improve the dish nutritionally for someone of this age with {disease}.

Avoid bullet points. Return only a single paragraph that reads like a nutritionist’s expert recommendation.
"""
    return prompt.strip()

# ✅ STEP 7: Run GPT-2 LLM (using transformers)
from transformers import pipeline, set_seed

def generate_hallucination_free_output(dish_name, age, disease):
    row = get_recipe_by_name(dish_name)
    if row is None:
        print(f"❌ Dish '{dish_name}' not found.")
        return

    raw_ingredients = row["ingredients"]
    targets = get_user_nutrient_goals(age)
    avoid = health_rules[disease.lower()]["avoid"]
    recommend = health_rules[disease.lower()]["recommend"]

    prompt = build_strict_prompt(dish_name, age, disease, raw_ingredients, avoid, recommend, targets)
    print("\n📜 Prompt for GPT-2:\n", prompt)

    generator = pipeline("text-generation", model="gpt2")
    set_seed(42)
    result = generator(prompt, max_length=300, do_sample=True, temperature=0.7)[0]['generated_text']

    print("\n💬 GPT-2 Output:\n", result.split("---")[-1].strip())

# ✅ EXAMPLE CALL
generate_hallucination_free_output("butter chicken", 50, "diabetes")
