<a href="https://colab.research.google.com/github/AnjaliVaghjiani/Thesis/blob/main/Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets transformers sentence-transformers torch

In [None]:
from datasets import load_dataset

# Replace 'your_dataset_name' with the HF dataset repo ID
dataset = load_dataset('AnjaliNV/Templete2', split='train')

sample_size = 100  # adjust as needed

# Randomly select sample_size entries
dataset = dataset.shuffle(seed=42).select(range(sample_size))


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "AnjaliNV/WellBeing_Coach_LLM"  # Hugging Face repo of your model

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)




In [None]:
def generate_recommendation(profile):
    prompt = f"""
You are a professional well-being coach.
Your role is to generate personalized well-being recommendations for the user.

USER INPUT:
{profile}

INSTRUCTIONS:
- Always follow the official guidelines before making any recommendation.
- Do NOT over-recommend. Keep suggestions safe, concise, and realistic. Ask user to simple avoid the contradit food with their diet prefernce.
- Always take into account the user’s age, sex, and fitness history.
- Strictly follow the output template provided below. Do not add extra sections, headings, or commentary.
- Keep tone supportive, professional, and encouraging.

TASK:
Based on the above user data, generate recommendations using EXACTLY the following structure:

1) Food Recommendation
- Overall Assessment: [...]
- Areas of Improvements: [...]
- Suggested Meals: [...]

2) Physical Activity
- Activity Assessment: [...]
- Zone Minutes and Intensity Feedback: [...]
- Strength/Cardio Tips: [Workout Suggestions]
- Weekly Goals: [...]

3) Sleep, Mood, Mental Health and Lifestyle
- Sleep and Mood Review: [...]
- Suggestions: [...]

4) Weekly Summary
- Summary: [...]
- Goals: [...]
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_length = inputs.input_ids.shape[1]
    output_ids = model.generate(**inputs, max_new_tokens=700)
    output_text = tokenizer.decode(output_ids[0][input_length:], skip_special_tokens=True)
    return output_text


In [None]:
from sentence_transformers import SentenceTransformer, util
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load a lightweight Sentence-BERT model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

semantic_scores = []

for sample in dataset:
    profile = sample['input']
    reference = sample['output']

    llm_output = generate_recommendation(profile)

    # Compute embeddings
    emb_ref = embed_model.encode(reference, convert_to_tensor=True)
    emb_llm = embed_model.encode(llm_output, convert_to_tensor=True)

    # Cosine similarity
    similarity = util.pytorch_cos_sim(emb_ref, emb_llm).item()
    semantic_scores.append(similarity)

# Average semantic similarity
avg_similarity = sum(semantic_scores) / len(semantic_scores)
print("Average Semantic Similarity:", avg_similarity)


In [None]:
import matplotlib.pyplot as plt

# Histogram of scores
plt.figure(figsize=(10, 5))
plt.hist(semantic_scores, bins=20, edgecolor='black', alpha=0.7)
plt.axvline(avg_similarity, color='red', linestyle='--', label=f'Avg = {avg_similarity:.3f}')
plt.title("Distribution of Semantic Similarity Scores")
plt.xlabel("Semantic Similarity")
plt.ylabel("Frequency")
plt.legend()
plt.show()

# Boxplot (to spot outliers / low scores)
plt.figure(figsize=(6, 5))
plt.boxplot(semantic_scores, vert=True, patch_artist=True)
plt.axhline(avg_similarity, color='red', linestyle='--', label=f'Avg = {avg_similarity:.3f}')
plt.title("Boxplot of Semantic Similarity Scores")
plt.ylabel("Semantic Similarity")
plt.legend()
plt.show()


In [None]:
import pandas as pd
df = pd.DataFrame({
    "profile": [sample['input'] for sample in dataset],
    "reference": [sample['output'] for sample in dataset],
    "similarity": semantic_scores
})
df.to_csv("semantic_evaluation_results.csv", index=False)
