In [None]:
!pip install sentence-transformers faiss-cpu tqdm


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [None]:
# ✅ STEP 1: Import Required Libraries
import pandas as pd
import numpy as np
import re
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
# ✅ STEP 2: Load Datasets
recipes_df = pd.read_csv("/content/70000_recipes_nutrients_cleaned_final.csv")
health_df = pd.read_csv("/content/health_age_data_70000_synthetic.csv")

In [None]:
# ✅ STEP 3: Ingredient Cleaning Function
def clean_ingredients(raw_ingredients):
    items = [re.sub(r"[^a-zA-Z ]", "", x).strip().lower() for x in str(raw_ingredients).split(',')]
    items = [i for i in items if len(i) > 1 and not i.isdigit()]
    return ', '.join(sorted(set(items)))


In [None]:
# ✅ STEP 4: Define Health Rules (manual)
health_rules = {
    "Diabetes": {
        "avoid": ["sugar", "white rice", "ghee", "salt"],
        "recommend": ["olive oil", "turmeric", "cinnamon", "fiber-rich foods"]
    },
    "Hypertension": {
        "avoid": ["salt", "cheese", "processed meat"],
        "recommend": ["spinach", "garlic", "low-sodium spices"]
    },
    "Heart Disease": {
        "avoid": ["butter", "saturated fat", "excess salt"],
        "recommend": ["omega-3s", "olive oil", "leafy greens"]
    }
}


In [None]:
# ✅ STEP 5: Nutrient Goal Extractor
def get_user_nutrient_goals(age):
    age_range = health_df[(health_df["Ages"] >= age - 3) & (health_df["Ages"] <= age + 3)]
    if age_range.empty:
        return {}
    return age_range.iloc[0].to_dict()


In [None]:
# ✅ STEP 6: Prompt Generator Function
def build_improved_prompt(dish_name, age, disease, ingredients, avoid, recommend, similar_dishes, targets):
    cleaned = clean_ingredients(ingredients)
    avoid_str = ', '.join(set(avoid))
    recommend_str = ', '.join(set(recommend))
    similar_str = ', '.join(set(similar_dishes))

    prompt = f"""
You are a nutrition advisor.

The user is {age} years old and has {disease}.
Here is a dish they usually eat: "{dish_name}"

Ingredients in the dish:
{cleaned}

Dietary Restrictions:
Avoid: {avoid_str or 'None'}
Recommend: {recommend_str or 'None'}

Similar Dishes: {similar_str or 'None'}

Daily Nutrient Targets:
- Calories: {targets.get('Daily Calorie Target', 'N/A')}
- Protein: {targets.get('Protein', 'N/A')}g
- Carbs: {targets.get('Carbohydrates', 'N/A')}g
- Fat: {targets.get('Fat', 'N/A')}g
- Fiber: {targets.get('Fiber', 'N/A')}g

Suggest a healthier version of the dish, optimized for the user’s age and disease.

Return the output strictly in this format. Avoid repetitions and do not include preparation words like “grated,” “cut,” “crushed,” etc.:

--- Nutrition Advisor Output ---

Full optimized ingredient list:
- (List only clean ingredient names)

Ingredients to REMOVE:
- (Only remove ingredients that are unhealthy for the specified disease)

Ingredients to ADD:
- (List new ingredients and explain in short brackets like: garlic (helps reduce blood pressure))

Explanation:
(Explain why you made those substitutions based on the user’s age and health issue)
"""
    return prompt.strip()


In [None]:
# ✅ STEP 7: Build FAISS Index for Similar Dish Retrieval
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
recipe_names = recipes_df["recipe_name"].astype(str).tolist()
batch_size = 512
recipe_embeddings = []

for i in tqdm(range(0, len(recipe_names), batch_size), desc="🔁 Embedding Batches"):
    batch = recipe_names[i:i+batch_size]
    batch_embeddings = embedding_model.encode(batch, convert_to_numpy=True)
    recipe_embeddings.append(batch_embeddings)

recipe_embeddings = np.vstack(recipe_embeddings)
index = faiss.IndexFlatL2(recipe_embeddings.shape[1])
index.add(recipe_embeddings)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🔁 Embedding Batches: 100%|██████████| 135/135 [06:29<00:00,  2.89s/it]


In [None]:
def get_similar_dishes(dish_name, top_k=3):
    query_vec = embedding_model.encode([dish_name], convert_to_numpy=True)
    D, I = index.search(query_vec, top_k)
    return [recipe_names[i] for i in I[0] if i < len(recipe_names)]

In [None]:
# ✅ STEP 8: Full Prompt Generation Function
def generate_prompt_from_dish(dish_name, age, disease):
    row = recipes_df[recipes_df["recipe_name"].str.lower() == dish_name.lower()]
    if row.empty:
        return f"❌ Dish '{dish_name}' not found in dataset."

    raw_ingredients = row.iloc[0]["ingredients"]
    disease_lower = disease.lower()
    if disease_lower in [d.lower() for d in health_rules]:
        matched_key = [d for d in health_rules if d.lower() == disease_lower][0]
        avoid = health_rules[matched_key]["avoid"]
        recommend = health_rules[matched_key]["recommend"]
    else:
        avoid, recommend = [], []

    similar = get_similar_dishes(dish_name)
    targets = get_user_nutrient_goals(age)

    return build_improved_prompt(
        dish_name=dish_name,
        age=age,
        disease=disease,
        ingredients=raw_ingredients,
        avoid=avoid,
        recommend=recommend,
        similar_dishes=similar,
        targets=targets
    )

In [None]:
# ✅ STEP 9: Load LLaMA Model for Text Generation
device = "cuda" if torch.cuda.is_available() else "cpu"
llama_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(llama_model_name)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_name).to(device)

def llm(prompt, max_tokens=300):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    outputs = llama_model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.7,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
# ✅ STEP 10: Post-processor to Clean LLM Output
def clean_llm_output(text):
    lines = text.split('\n')
    clean_lines = []
    seen = set()
    for line in lines:
        if not line.strip():
            continue
        clean_line = re.sub(r'\b(grated|cut|chopped|sliced|crushed|peeled|washed|dried|fresh)\b', '', line, flags=re.IGNORECASE)
        clean_line = re.sub(r'\s+', ' ', clean_line).strip('- ').strip()
        if clean_line.lower() not in seen and clean_line:
            seen.add(clean_line.lower())
            clean_lines.append(f"- {clean_line}")
    return '\n'.join(clean_lines)

In [None]:
prompt = generate_prompt_from_dish("Chicken tikka", 60, "Diabetes")
print("--- Generated Prompt ---")
print(prompt)


--- Generated Prompt ---
You are a nutrition advisor.

The user is 60 years old and has Diabetes.
Here is a dish they usually eat: "Chicken tikka"

Ingredients in the dish:
bunches english spinach, cmpiece fresh ginger, crushed, dried, ends trimmed, fresh coriander leaves, g  cup lowfat natural yoghurt, g diced chicken breast, garlic cloves, grated, lemon wedges, ml  cup fresh lemon juice, peeled, round cmdiameter flour tortillas, salt  freshly ground pepper, teaspoon garam masala, teaspoon hot chilli powder, teaspoons sharwoods tandoori curry paste, to serve, washed

Dietary Restrictions:
Avoid: ghee, white rice, sugar, salt
Recommend: olive oil, fiber-rich foods, turmeric, cinnamon

Similar Dishes: chicken tikka masala, chicken tikka, chicken tikka sandwich

Daily Nutrient Targets:
- Calories: 3195
- Protein: 239g
- Carbs: 319g
- Fat: 106g
- Fiber: 38.28g

Suggest a healthier version of the dish, optimized for the user’s age and disease.

Return the output strictly in this format. Av

In [None]:
raw_output = llm(prompt, max_tokens=300)
print("\n--- Raw LLaMA Output ---")
print(raw_output)



--- Raw LLaMA Output ---
You are a nutrition advisor.

The user is 60 years old and has Diabetes.
Here is a dish they usually eat: "Chicken tikka"

Ingredients in the dish:
bunches english spinach, cmpiece fresh ginger, crushed, dried, ends trimmed, fresh coriander leaves, g  cup lowfat natural yoghurt, g diced chicken breast, garlic cloves, grated, lemon wedges, ml  cup fresh lemon juice, peeled, round cmdiameter flour tortillas, salt  freshly ground pepper, teaspoon garam masala, teaspoon hot chilli powder, teaspoons sharwoods tandoori curry paste, to serve, washed

Dietary Restrictions:
Avoid: ghee, white rice, sugar, salt
Recommend: olive oil, fiber-rich foods, turmeric, cinnamon

Similar Dishes: chicken tikka masala, chicken tikka, chicken tikka sandwich

Daily Nutrient Targets:
- Calories: 3195
- Protein: 239g
- Carbs: 319g
- Fat: 106g
- Fiber: 38.28g

Suggest a healthier version of the dish, optimized for the user’s age and disease.

Return the output strictly in this format. A

In [None]:
print("\n--- Cleaned Output (Post-Processed) ---")
print(clean_llm_output(raw_output))



--- Cleaned Output (Post-Processed) ---
- You are a nutrition advisor.
- The user is 60 years old and has Diabetes.
- Here is a dish they usually eat: "Chicken tikka"
- Ingredients in the dish:
- bunches english spinach, cmpiece ginger, , , ends trimmed, coriander leaves, g cup lowfat natural yoghurt, g diced chicken breast, garlic cloves, , lemon wedges, ml cup lemon juice, , round cmdiameter flour tortillas, salt freshly ground pepper, teaspoon garam masala, teaspoon hot chilli powder, teaspoons sharwoods tandoori curry paste, to serve,
- Dietary Restrictions:
- Avoid: ghee, white rice, sugar, salt
- Recommend: olive oil, fiber-rich foods, turmeric, cinnamon
- Similar Dishes: chicken tikka masala, chicken tikka, chicken tikka sandwich
- Daily Nutrient Targets:
- Calories: 3195
- Protein: 239g
- Carbs: 319g
- Fat: 106g
- Fiber: 38.28g
- Suggest a healthier version of the dish, optimized for the user’s age and disease.
- Return the output strictly in this format. Avoid repetitions and

In [None]:
!pip install llama-cpp-python

from llama_cpp import Llama

llm = Llama(
    model_path="/content/drive/MyDrive/llama-pro-8b-instruct.Q4_K_M.gguf",
    n_ctx=2048,
    n_threads=8
)

prompt = """
You are a nutrition advisor.

The user is 60 years old and has Diabetes.
Here is a dish they usually eat: "Chicken tikka"

Ingredients in the dish:
chicken breast, garlic, yogurt, cream, oil, flour tortilla, salt, garam masala, chilli powder

Avoid: sugar, white rice, ghee, salt
Recommend: olive oil, turmeric, cinnamon, fiber-rich foods

Return the output strictly in this format. Avoid repetition:

--- Nutrition Advisor Output ---

Full optimized ingredient list:
- (List)

Ingredients to REMOVE:
- (List)

Ingredients to ADD:
- (List with reason)

Explanation:
(Short explanation why items were changed)
"""

output = llm(prompt, max_tokens=300)
print(output["choices"][0]["text"])


Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.12.tar.gz (49.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.8/49.8 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.12-cp311-cp311-linux_x86_64.whl size=4158982 sha256=f2af52b9e6e476ca037

llama_model_loader: loaded meta data with 23 key-value pairs and 363 tensors from /content/drive/MyDrive/llama-pro-8b-instruct.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.at




First prompt:
i need to do project by giving the input as dish name , age and health disease , the ouput should be ingredients should be added in this dish

last propmt : the output is not following the prompt and give me the good strict prompt to generate my output