In [3]:
# ✅ STEP 1: Install Required Packages
!pip install transformers sentence-transformers

# ✅ STEP 2: Import Libraries
import pandas as pd
import re
import torch
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from transformers import BartTokenizer, BartForConditionalGeneration

# ✅ STEP 3: Load Your Datasets
recipes_df = pd.read_csv("/content/70000_recipes_nutrients.csv")
profile_df = pd.read_csv("/content/health_age_data_70000_synthetic.csv")

# ✅ STEP 4: Clean Ingredients
def clean_ingredients(text):
    if pd.isna(text): return ""
    text = re.sub(r"[^a-zA-Z0-9, ]", "", text)
    return ', '.join([w.strip().lower() for w in text.split(',') if w.strip()])

# ✅ STEP 5: Find Similar Dishes Using Sentence Embeddings
def get_similar_dishes(dish_name, top_k=3):
    model = SentenceTransformer("all-MiniLM-L6-v2")
    names = recipes_df["recipe_name"].astype(str).tolist()
    embeddings = model.encode(names)
    target_emb = model.encode([dish_name])
    scores = cosine_similarity(target_emb, embeddings)[0]
    top_indices = scores.argsort()[::-1][:top_k]
    return [names[i] for i in top_indices]

# ✅ STEP 6: Nutrient Targets Based on Age
def get_user_nutrient_goals(age):
    return {
        "Daily Calorie Target": 2500 if age < 50 else 2200,
        "Protein": 200,
        "Carbohydrates": 250,
        "Fat": 70,
        "Fiber": 30
    }

# ✅ STEP 7: Define Health Rules Directly in Code
def build_health_rules():
    rules = {
        "diabetes": {
            "avoid": ["sugar", "ghee", "salt"],
            "recommend": ["cinnamon", "olive oil", "spinach"]
        },
        "heart disease": {
            "avoid": ["butter", "cream", "red meat"],
            "recommend": ["garlic", "spinach", "flaxseeds"]
        },
        "obesity": {
            "avoid": ["soda", "fries", "white bread"],
            "recommend": ["vegetables", "oats", "lean protein"]
        },
        "hypertension": {
            "avoid": ["salt", "processed meat"],
            "recommend": ["leafy greens", "olive oil"]
        },
        "cholesterol": {
            "avoid": ["egg yolk", "red meat", "saturated fat"],
            "recommend": ["chia seeds", "walnuts", "whole grains"]
        }
    }
    return rules

health_rules = build_health_rules()

# ✅ STEP 8: Approved Additions List
approved_additions = [
    "turmeric", "olive oil", "cinnamon", "spinach", "broccoli", "kale",
    "bell peppers", "mushrooms", "zucchini", "flaxseeds", "chia seeds", "whole grain pasta"
]

# ✅ STEP 9: Prompt Builder
def build_prompt(dish_name, age, disease, ingredients, avoid, recommend, targets):
    cleaned = clean_ingredients(ingredients)
    cleaned_list = [i.strip() for i in cleaned.split(',') if i.strip()]
    bullet_ing = "\n".join(f"- {item}" for item in cleaned_list)
    prompt = f"""
A {age}-year-old patient with {disease} eats a dish called "{dish_name}".

Ingredients:
{bullet_ing}

Nutrition Goals:
Calories: {targets["Daily Calorie Target"]}
Protein: {targets["Protein"]}g
Carbs: {targets["Carbohydrates"]}g
Fat: {targets["Fat"]}g
Fiber: {targets["Fiber"]}g

Avoid: {', '.join(avoid)}
Recommend from: {', '.join(approved_additions)}

As a nutrition expert, revise the dish by:
- Removing harmful ingredients
- Adding healthier alternatives
- Explaining how this helps the patient
"""
    return prompt.strip()

# ✅ STEP 10: Load BART Model
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

# ✅ STEP 11: BART Text Generation
def bart_llm(prompt, max_tokens=256):
    input_ids = bart_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).input_ids
    with torch.no_grad():
        outputs = bart_model.generate(input_ids, max_new_tokens=max_tokens)
    return bart_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

# ✅ STEP 12: Final Pipeline
def generate_bart_output(dish_name, age, disease):
    print(f"🔍 Finding best match for: {dish_name}")
    match = get_similar_dishes(dish_name)[0]
    row = recipes_df[recipes_df["recipe_name"].str.lower() == match.lower()]

    if row.empty:
        return f"❌ Recipe '{match}' not found"

    raw_ing = row.iloc[0]["ingredients"]
    targets = get_user_nutrient_goals(age)
    disease_lower = disease.strip().lower()

    matched_key = next((key for key in health_rules if key in disease_lower), None)
    if matched_key is None:
        return f"❌ No health rules found for: '{disease}'"

    avoid = health_rules[matched_key]["avoid"]
    recommend = health_rules[matched_key]["recommend"]

    prompt = build_prompt(
        dish_name=match,
        age=age,
        disease=disease,
        ingredients=raw_ing,
        avoid=avoid,
        recommend=recommend,
        targets=targets
    )

    print("\n📨 Prompt Sent:\n", prompt)
    result = bart_llm(prompt)
    print("\n🤖 BART Response:\n", result)
    return result

# ✅ RUN EXAMPLE
generate_bart_output("pasta", 40, "diabetes, hypertension, heart disease")


🔍 Finding best match for: pasta

📨 Prompt Sent:
 A 40-year-old patient with diabetes, hypertension, heart disease eats a dish called "pizza pasta".

Ingredients:
- rotini pasta
- lean ground beef
- small onion
- spaghetti sauce
- sliced pepperoni sausage
- shredded mozzarella cheese

Nutrition Goals:
Calories: 2500
Protein: 200g
Carbs: 250g
Fat: 70g
Fiber: 30g

Avoid: sugar, ghee, salt
Recommend from: turmeric, olive oil, cinnamon, spinach, broccoli, kale, bell peppers, mushrooms, zucchini, flaxseeds, chia seeds, whole grain pasta

As a nutrition expert, revise the dish by:
- Removing harmful ingredients
- Adding healthier alternatives
- Explaining how this helps the patient

🤖 BART Response:
 A 40-year-old patient with diabetes, hypertension, heart disease eats a dish called "pizza pasta" The dish contains rotini pasta, lean ground beef, small onion, spaghetti sauce, sliced pepperoni sausage and shredded mozzarella cheese. A nutrition expert can revise the dish by removing harmful ing

'A 40-year-old patient with diabetes, hypertension, heart disease eats a dish called "pizza pasta" The dish contains rotini pasta, lean ground beef, small onion, spaghetti sauce, sliced pepperoni sausage and shredded mozzarella cheese. A nutrition expert can revise the dish by removing harmful ingredients and adding healthier alternatives.'