In [1]:
"""
Project: Prompt Strategy Benchmarking
Description: This notebook explores different prompt strategies for summarizing semi-structured wellness logs using open-source LLMs.
Author: Kavitha Lingarajegowda
"""


'\nProject: Prompt Strategy Benchmarking\nDescription: This notebook explores different prompt strategies for summarizing semi-structured wellness logs using open-source LLMs.\nAuthor: Kavitha Lingarajegowda\n'

In [2]:
# Sample wellness logs (dummy data)
wellness_logs = [
    {"date": "2025-08-20", "sleep": "7 hours", "mood": "happy", "activity": "yoga"},
    {"date": "2025-08-21", "sleep": "6 hours", "mood": "tired", "activity": "reading"},
    {"date": "2025-08-22", "sleep": "8 hours", "mood": "energetic", "activity": "jogging"}
]

# Print sample
for log in wellness_logs:
    print(log)


{'date': '2025-08-20', 'sleep': '7 hours', 'mood': 'happy', 'activity': 'yoga'}
{'date': '2025-08-21', 'sleep': '6 hours', 'mood': 'tired', 'activity': 'reading'}
{'date': '2025-08-22', 'sleep': '8 hours', 'mood': 'energetic', 'activity': 'jogging'}


In [3]:
# Install Hugging Face Transformers (free, works on Colab)
!pip install transformers

from transformers import pipeline

# Initialize a summarization pipeline with a small, free model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [5]:
# Define two simple prompt strategies
prompts = {
    "concise": "Summarize the wellness log concisely:",
    "detailed": "Summarize the wellness log with details:"
}

# Apply prompts to the dummy logs
for strategy, prompt_text in prompts.items():
    print(f"\n=== Strategy: {strategy} ===")
    for log in wellness_logs:
        text = f"{prompt_text} Date: {log['date']}, Sleep: {log['sleep']}, Mood: {log['mood']}, Activity: {log['activity']}"
        summary = summarizer(text, max_length=20, min_length=5, do_sample=False)[0]['summary_text']
        print(summary)



=== Strategy: concise ===
The wellness log is a way to document a person's well-being. The log
Sleep: 6 hours, Mood: tired, Activity: reading.
Sleep: 8 hours, Mood: energetic, Activity: jogging. Date: 2025

=== Strategy: detailed ===
Sleep: 7 hours, Mood: happy, Activity: yoga. Summarize the
Sleep: 6 hours, Mood: tired, Activity: reading. Summarize the
Sleep: 8 hours, Mood: energetic, Activity: jogging. Summarize


In [6]:
# Install textstat for basic readability metrics
!pip install textstat

import textstat


Collecting textstat
  Downloading textstat-0.7.8-py3-none-any.whl.metadata (15 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Collecting cmudict (from textstat)
  Downloading cmudict-1.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading textstat-0.7.8-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.1/239.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cmudict-1.1.1-py3-none-any.whl (939 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.7/939.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, cmudict, textstat
Successfully installed cmudict-1.1.1 pyphen-0.17.2 textstat-0.7.8


In [8]:
# Evaluate summaries for each prompt strategy
for strategy, prompt_text in prompts.items():
    print(f"\n=== Strategy: {strategy} ===")
    for log in wellness_logs:
        text = f"{prompt_text} Date: {log['date']}, Sleep: {log['sleep']}, Mood: {log['mood']}, Activity: {log['activity']}"
        summary = summarizer(text, max_length=20, min_length=5, do_sample=False)[0]['summary_text']

        # Compute readability
        readability = textstat.flesch_reading_ease(summary)
        print(f"Summary: {summary}")
        print(f"Readability (Flesch Reading Ease): {readability}\n")



=== Strategy: concise ===
Summary: The wellness log is a way to document a person's well-being. The log
Readability (Flesch Reading Ease): 69.99384615384618

Summary: Sleep: 6 hours, Mood: tired, Activity: reading.
Readability (Flesch Reading Ease): 42.61571428571432

Summary: Sleep: 8 hours, Mood: energetic, Activity: jogging. Date: 2025
Readability (Flesch Reading Ease): 37.900000000000034


=== Strategy: detailed ===
Summary: Sleep: 7 hours, Mood: happy, Activity: yoga. Summarize the
Readability (Flesch Reading Ease): 37.900000000000034

Summary: Sleep: 6 hours, Mood: tired, Activity: reading. Summarize the
Readability (Flesch Reading Ease): 37.900000000000034

Summary: Sleep: 8 hours, Mood: energetic, Activity: jogging. Summarize
Readability (Flesch Reading Ease): 8.365000000000009

