# Use Gemma to generate reasons for recommendation results
- The description allows the user to understand in a friendly natural language what state they are in and for what reason this food was recommended

In [None]:
!pip install -q transformers accelerate

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login

# Key issued for GitHub security policy reasons is not exposed
login("MY TOKEN KEY")

model_id = "google/gemma-2b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto"
)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Device set to use cuda:0


# Testing with gemma-2b-it

In [None]:
import pandas as pd

# 1. Load the CSV file
df = pd.read_csv("persona_prompt_sample.csv")

# 2. Extract the top 5 prompts
test_df = df.head(5)

# 3. Function to generate recommendation reason
def generate_reason(prompt):
    try:
        output = generator(prompt, max_new_tokens=180, do_sample=True, temperature=0.7)[0]["generated_text"]
        return output.replace(prompt, "").strip()
    except Exception as e:
        return f"Error: {e}"

# 4. Generate and print 5 recommendations based on gemma_prompt
for i, prompt in enumerate(test_df["gemma_prompt"]):
    print(f"\n🟡 Prompt #{i+1}\n{'-'*80}\n{prompt}\n")
    response = generate_reason(prompt)
    print(f"🟢 Generated Recommendation #{i+1}\n{'-'*80}\n{response}\n")


🟡 Prompt #1
--------------------------------------------------------------------------------
[INTERNAL REASONING LOGIC]

This child is 8 years old and has a BMI of 16.4, categorized as Underweight.
That means their body might need more energy to stay strong and healthy.

They cannot cook, so the food must be simple enough for an adult to help prepare.

They are allergic to peanut, soy, which was excluded from the recommendation.

They have ingredients like egg, bread, milk available at home.

Based on these factors, the machine learning model recommended: Avocado Toast.

---

📩 Please now write a friendly and magical explanation to the child,
encouraging them to enjoy this food and feel excited to try it.
Avoid technical language and speak like a supportive food buddy!

🟢 Generated Recommendation #1
--------------------------------------------------------------------------------
Hey there, you're 8 years old, and I'm your food buddy for today! Let's see how we can make your tummy happ

In [None]:
# 3. Upload CSV file
from google.colab import files
uploaded = files.upload()

# 4. Load the DataFrame
import pandas as pd
df = pd.read_csv("gemma_prompt_internal_reasoning_500.csv")

# 5. Define function to generate recommendation reasons
def generate_reason(prompt):
    try:
        output = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.7)[0]["generated_text"]
        return output.replace(prompt, "").strip()
    except Exception as e:
        return f"Error: {e}"

# 6. Run generation (may take some time)
from tqdm import tqdm

results = []
for prompt in tqdm(df["gemma_prompt"], desc="Generating recommendations"):
    result = generate_reason(prompt)
    results.append(result)

df["recommendation_reason"] = results

# 7. Save and download the result
df.to_csv("reason_withgemma.csv", index=False, encoding='utf-8-sig')

from google.colab import files
files.download("reason_withgemma.csv")

Saving gemma_prompt_internal_reasoning_400.csv to gemma_prompt_internal_reasoning_400 (1).csv


Generating recommendations: 100%|██████████| 400/400 [25:03<00:00,  3.76s/it]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Pre-processing recommendation reasons

In [None]:
import re

# Function to clean up recommendation_reason
def clean_reason(text):
    if pd.isna(text):
        return ""
    # Remove non-standard characters or leftover tokens
    text = re.sub(r"[\[\]\{\}\"\'\\]", "", text)  # Remove brackets, quotes, slashes
    text = re.sub(r"\s+", " ", text).strip()  # Normalize spaces
    text = re.sub(r"[.,;!?]+(?=[.,;!?])", "", text)  # Remove repeated punctuation
    text = re.sub(r"(?<!\w)[.,;!?]", "", text)  # Remove punctuation not following a word
    # Capitalize the first letter and ensure sentence ends with a period
    if not text.endswith('.'):
        text += '.'
    return text[0].upper() + text[1:] if text else ""

# Apply cleanup to the 'recommendation_reason' column
df['recommendation_reason'] = df['recommendation_reason'].apply(clean_reason)

# Save the cleaned version
cleaned_file_path = "cleaned_reason_withgemma.csv"
df.to_csv(cleaned_file_path, index=False)

import ace_tools as tools; tools.display_dataframe_to_user(name="Cleaned Recommendation Reasons", dataframe=df)

cleaned_file_path