In [1]:
# ‚úÖ STEP 1: Install Required Packages
!pip install -q transformers sentence-transformers faiss-cpu

# ‚úÖ STEP 2: Import Libraries
import pandas as pd
import re
import torch
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

# ‚úÖ STEP 3: Load Datasets
recipes_df = pd.read_csv("/content/70000_recipes_nutrients.csv")
profile_df = pd.read_csv("/content/health_age_data_70000_synthetic.csv")

# ‚úÖ STEP 4: Clean Ingredients
def clean_ingredients(text):
    if pd.isna(text): return ""
    text = re.sub(r"[^a-zA-Z0-9, ]", "", text)
    return ', '.join([w.strip().lower() for w in text.split(',') if w.strip()])

# ‚úÖ STEP 5: Find Similar Dishes Using Sentence Embeddings
def get_similar_dishes(dish_name, top_k=3):
    model = SentenceTransformer("all-MiniLM-L6-v2")
    names = recipes_df["recipe_name"].astype(str).tolist()
    embeddings = model.encode(names)
    target_emb = model.encode([dish_name])
    scores = cosine_similarity(target_emb, embeddings)[0]
    top_indices = scores.argsort()[::-1][:top_k]
    return [names[i] for i in top_indices]

# ‚úÖ STEP 6: Nutrient Targets
def get_user_nutrient_goals(age):
    return {
        "Daily Calorie Target": 2500 if age < 50 else 2200,
        "Protein": 200,
        "Carbohydrates": 250,
        "Fat": 70,
        "Fiber": 30
    }

# ‚úÖ STEP 7: Health Rules
def build_health_rules():
    return {
        "diabetes": {"avoid": ["sugar", "ghee", "salt"], "recommend": ["cinnamon", "olive oil", "spinach"]},
        "heart disease": {"avoid": ["butter", "cream", "red meat"], "recommend": ["garlic", "spinach", "flaxseeds"]},
        "obesity": {"avoid": ["soda", "fries", "white bread"], "recommend": ["vegetables", "oats", "lean protein"]},
        "hypertension": {"avoid": ["salt", "processed meat"], "recommend": ["leafy greens", "olive oil"]},
        "cholesterol": {"avoid": ["egg yolk", "red meat", "saturated fat"], "recommend": ["chia seeds", "walnuts", "whole grains"]}
    }

health_rules = build_health_rules()

approved_additions = [
    "turmeric", "olive oil", "cinnamon", "spinach", "broccoli", "kale",
    "bell peppers", "mushrooms", "zucchini", "flaxseeds", "chia seeds", "whole grain pasta"
]

# ‚úÖ STEP 8: Falcon Prompt Builder
def build_prompt_falcon(dish_name, age, disease, ingredients, avoid, recommend, targets):
    cleaned = clean_ingredients(ingredients)
    cleaned_list = [i.strip() for i in cleaned.split(',') if i.strip()]
    bullet_ing = "\n".join(f"- {item}" for item in cleaned_list)

    return f"""
You are a professional nutritionist.

A {age}-year-old patient with {disease} eats a dish called "{dish_name}".

Ingredients:
{bullet_ing}

Nutrition Targets:
- Calories: {targets["Daily Calorie Target"]}
- Protein: {targets["Protein"]}g
- Carbs: {targets["Carbohydrates"]}g
- Fat: {targets["Fat"]}g
- Fiber: {targets["Fiber"]}g

Avoid: {', '.join(avoid)}
Only recommend additions from: {', '.join(approved_additions)}

Write a short paragraph:
- List harmful ingredients to remove and why.
- List healthy additions and why.
- Explain how the changes improve the dish for the given condition.
"""

# ‚úÖ STEP 9: Load Falcon-RW-1B
falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
falcon_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b")
falcon_model.eval().to("cuda" if torch.cuda.is_available() else "cpu")

# ‚úÖ STEP 10: Text Generation with Falcon
def falcon_llm(prompt, max_tokens=256):
    inputs = falcon_tokenizer(prompt, return_tensors="pt").to(falcon_model.device)
    with torch.no_grad():
        outputs = falcon_model.generate(**inputs, max_new_tokens=max_tokens)
    return falcon_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

# ‚úÖ STEP 11: Final Output Pipeline
def generate_output_falcon(dish_name, age, disease):
    print(f"üîç Finding best match for: {dish_name}")
    match = get_similar_dishes(dish_name)[0]
    row = recipes_df[recipes_df["recipe_name"].str.lower() == match.lower()]
    if row.empty:
        return f"‚ùå Recipe '{match}' not found"

    raw_ing = row.iloc[0]["ingredients"]
    targets = get_user_nutrient_goals(age)
    disease_lower = disease.strip().lower()
    matched_key = next((key for key in health_rules if key in disease_lower), None)
    if matched_key is None:
        return f"‚ùå No health rules found for any condition in: '{disease}'"

    avoid = health_rules[matched_key]["avoid"]
    recommend = health_rules[matched_key]["recommend"]

    prompt = build_prompt_falcon(
        dish_name=match,
        age=age,
        disease=disease,
        ingredients=raw_ing,
        avoid=avoid,
        recommend=recommend,
        targets=targets
    )

    print("\nüì® Prompt Sent:\n", prompt)
    result = falcon_llm(prompt)
    print("\nü§ñ Falcon-RW-1B Response:\n", result)
    return result

# ‚úÖ TEST
generate_output_falcon("cake", 45, "diabetes, hypertension, heart disease")


[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m31.3/31.3 MB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m363.4/363.4 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.8/13.8 MB[0m [31m55.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m24.6/24.6 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m883.7/883.7 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

üîç Finding best match for: cake


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



üì® Prompt Sent:
 
You are a professional nutritionist.

A 45-year-old patient with diabetes, hypertension, heart disease eats a dish called "king cake".

Ingredients:
- active dry yeast
- white sugar
- warm milk 110 degrees f45 degrees c
- butter
- egg yolks
- allpurpose flour
- salt
- ground nutmeg
- grated lemon zest
- cream cheese
- confectioners sugar
- confectioners sugar
- lemon juice
- milk
- multicolored candy sprinkles

Nutrition Targets:
- Calories: 2500
- Protein: 200g
- Carbs: 250g
- Fat: 70g
- Fiber: 30g

Avoid: sugar, ghee, salt
Only recommend additions from: turmeric, olive oil, cinnamon, spinach, broccoli, kale, bell peppers, mushrooms, zucchini, flaxseeds, chia seeds, whole grain pasta

Write a short paragraph:
- List harmful ingredients to remove and why.
- List healthy additions and why.
- Explain how the changes improve the dish for the given condition.


ü§ñ Falcon-RW-1B Response:
 You are a professional nutritionist.

A 45-year-old patient with diabetes, hyper

'You are a professional nutritionist.\n\nA 45-year-old patient with diabetes, hypertension, heart disease eats a dish called "king cake".\n\nIngredients:\n- active dry yeast\n- white sugar\n- warm milk 110 degrees f45 degrees c\n- butter\n- egg yolks\n- allpurpose flour\n- salt\n- ground nutmeg\n- grated lemon zest\n- cream cheese\n- confectioners sugar\n- confectioners sugar\n- lemon juice\n- milk\n- multicolored candy sprinkles\n\nNutrition Targets:\n- Calories: 2500\n- Protein: 200g\n- Carbs: 250g\n- Fat: 70g\n- Fiber: 30g\n\nAvoid: sugar, ghee, salt\nOnly recommend additions from: turmeric, olive oil, cinnamon, spinach, broccoli, kale, bell peppers, mushrooms, zucchini, flaxseeds, chia seeds, whole grain pasta\n\nWrite a short paragraph:\n- List harmful ingredients to remove and why.\n- List healthy additions and why.\n- Explain how the changes improve the dish for the given condition.\n- Explain how the changes improve the dish for the given condition.\n- Explain how the changes i

In [2]:
# ‚úÖ STEP 1: Install Cleanlab-TLM
!pip install cleanlab-tlm

# ‚úÖ STEP 2: Set API Key and Import
import os
from cleanlab_tlm import TLM

os.environ["CLEANLAB_TLM_API_KEY"] = "0d971ca876c443819414f3b8b8248a25"

# ‚úÖ STEP 3: Initialize TLM with explanation logging
tlm = TLM(options={"log": ["explanation"]})

# ‚úÖ STEP 4: Modified function to return both output and prompt
def generate_output_with_prompt_falcon(dish_name, age, disease):
    print(f"üîç Finding best match for: {dish_name}")
    match = get_similar_dishes(dish_name)[0]
    row = recipes_df[recipes_df["recipe_name"].str.lower() == match.lower()]
    if row.empty:
        return f"‚ùå Recipe '{match}' not found", ""

    raw_ing = row.iloc[0]["ingredients"]
    targets = get_user_nutrient_goals(age)
    disease_lower = disease.strip().lower()
    matched_key = next((key for key in health_rules if key in disease_lower), None)
    if matched_key is None:
        return f"‚ùå No health rules found for any condition in: '{disease}'", ""

    avoid = health_rules[matched_key]["avoid"]
    recommend = health_rules[matched_key]["recommend"]

    prompt = build_prompt_falcon(
        dish_name=match,
        age=age,
        disease=disease,
        ingredients=raw_ing,
        avoid=avoid,
        recommend=recommend,
        targets=targets
    )

    print("\nüì® Prompt Sent:\n", prompt)
    result = falcon_llm(prompt)
    print("\nü§ñ Falcon-RW-1B Response:\n", result)
    return result, prompt

# ‚úÖ STEP 5: Run the function with inputs
output, prompt = generate_output_with_prompt_falcon("cake", 45, "diabetes, hypertension")

# ‚úÖ STEP 6: Get Trust Score & Explanation
score_data = tlm.get_trustworthiness_score(prompt=prompt, response=output)
trust_score = score_data.get("trustworthiness_score", None)
explanation = score_data.get("explanation", "No explanation provided.")

# ‚úÖ STEP 7: Display Results
print("üî¢ Trust Score:", trust_score)
print("üìù Explanation:", explanation)


Collecting cleanlab-tlm
  Downloading cleanlab_tlm-1.1.20-py3-none-any.whl.metadata (3.9 kB)
Collecting semver<3.0.0,>=2.13.0 (from cleanlab-tlm)
  Downloading semver-2.13.0-py2.py3-none-any.whl.metadata (5.0 kB)
Downloading cleanlab_tlm-1.1.20-py3-none-any.whl (53 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m53.9/53.9 kB[0m [31m885.1 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading semver-2.13.0-py2.py3-none-any.whl (12 kB)
Installing collected packages: semver, cleanlab-tlm
Successfully installed cleanlab-tlm-1.1.20 semver-2.13.0
üîç Finding best match for: cake


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



üì® Prompt Sent:
 
You are a professional nutritionist.

A 45-year-old patient with diabetes, hypertension eats a dish called "king cake".

Ingredients:
- active dry yeast
- white sugar
- warm milk 110 degrees f45 degrees c
- butter
- egg yolks
- allpurpose flour
- salt
- ground nutmeg
- grated lemon zest
- cream cheese
- confectioners sugar
- confectioners sugar
- lemon juice
- milk
- multicolored candy sprinkles

Nutrition Targets:
- Calories: 2500
- Protein: 200g
- Carbs: 250g
- Fat: 70g
- Fiber: 30g

Avoid: sugar, ghee, salt
Only recommend additions from: turmeric, olive oil, cinnamon, spinach, broccoli, kale, bell peppers, mushrooms, zucchini, flaxseeds, chia seeds, whole grain pasta

Write a short paragraph:
- List harmful ingredients to remove and why.
- List healthy additions and why.
- Explain how the changes improve the dish for the given condition.


ü§ñ Falcon-RW-1B Response:
 You are a professional nutritionist.

A 45-year-old patient with diabetes, hypertension eats a 