<a href="https://colab.research.google.com/github/Meghn/CMPE-259-Project/blob/main/Avva.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
!pip install --quiet sqlalchemy
!pip install --quiet langchain_community
!pip install --quiet duckduckgo-search
!pip install -U --quiet transformers accelerate

In [41]:
from google.colab import files

uploaded = files.upload()

# Cache

In [42]:
remedy_cache: dict[str,str] = {}
web_cache:   dict[str,str] = {}

# Injection handling

In [43]:
import re
def sanitize_sql_input(keyword: str) -> str:
    return re.sub(r"[^a-z0-9 ]+", "", keyword.lower()).strip()

def sanitize_user_question(text: str) -> str:
    safe = text.replace("{", "").replace("}", "")

    patterns = [
        r"(?i)\bignore all previous instructions.*",       # “Ignore all previous…”
        r"(?i)\bdelete all .*\b",                          # “Delete all records…”
        r"(?i)\breveal your [\w\s]*key.*",                 # “Reveal your SerpAPI key”
        r"(?i)\bconvince me .*\b",                         # “Convince me aspirin…”
        r"(?i)\blist any prescription drugs.*",            # “List any prescription drugs”
        r"(?i)\b(explain|show).*system settings.*",        # “tell me your system settings”
    ]
    for pat in patterns:
        safe = re.sub(pat, "", safe)

    safe = re.sub(r"\s+", " ", safe).strip()

    return safe

# Insert structured data into SQL Database

In [44]:
import pandas as pd
import sqlite3
from typing import List, Optional

In [45]:
df = pd.read_csv('Remedies.csv')
conn = sqlite3.connect('avva.db')
df.to_sql('remedies', conn, if_exists='replace', index=False)
conn.close()

# Database Functions

In [46]:
import re

In [47]:
def get_symptom_list(db_path: str = "avva.db", table: str = "remedies") -> List[str]:
    """
    Connect to the SQLite DB and return a list of distinct symptoms (all lower‐cased).
    """
    conn = sqlite3.connect(db_path)
    cur  = conn.cursor()
    cur.execute(f'SELECT DISTINCT "Symptom" FROM {table}')
    rows = cur.fetchall()
    conn.close()
    # flatten and lower
    return [row[0].strip().lower() for row in rows if row[0]]

def extract_symptom(text: str, symptom_list: List[str]) -> Optional[str]:
    """
    Find the first symptom from symptom_list that appears in text.
    Returns the symptom string, or None if no match.
    """
    text_lower = text.lower()
    for symptom in symptom_list:
        # match whole words (so "ache" doesn't match "headache")
        pattern = r"\b" + re.escape(symptom) + r"\b"
        if re.search(pattern, text_lower):
            return symptom
    return None

In [48]:
symptom_list = get_symptom_list()

In [49]:
def generate_sql(keyword):
    # If keyword is a list, take the first element which should be the symptom
    if isinstance(keyword, list):
        keyword = keyword[0]
    sql_query = f"""
    SELECT Remedy, Description, Warning
    FROM remedies
    WHERE lower(Symptom) LIKE '%{keyword}%'"""
    return sql_query

In [50]:
def execute_query(sql_query, db_name="avva.db"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute(sql_query)
    results = cursor.fetchall()
    conn.close()
    return results

In [51]:
def get_remedies_from_db(user_query):

    found = extract_symptom(user_query, symptom_list)

    if found:
        query = generate_sql([found])
        results = execute_query(query)
        formatted_results = ""
        for row in results:
            formatted_results += str(row) + "\n"
        return formatted_results
    else:
        return None

# Web Search

In [52]:
from langchain_community.tools import DuckDuckGoSearchResults
from langchain import LLMChain, PromptTemplate

In [53]:
def get_remedies_from_web(query: str, model) -> str:
    if CACHING_ENABLED and query in web_cache:
      return web_cache[query]
    search_tool = DuckDuckGoSearchResults()

    search_prompt = PromptTemplate(
        input_variables=["query", "snippets"],
        template=(
            "User seeks a home remedy for: {query}\n\n"
            "Search snippets:\n{snippets}\n\n"
            "Extract a concise remedy and usage instructions."
        )
    )
    summarize_chain = search_prompt | model
    web_ans = summarize_chain.invoke({
        "query":    query,
        "snippets": search_tool.run(query)
    })

    if CACHING_ENABLED:
        web_cache[query] = web_ans

    return web_ans

# Tool creation

In [54]:
# from langchain.agents import Tool
# from langchain_community.utilities import SQLDatabase
# from langchain_community.tools import DuckDuckGoSearchRun

In [55]:
def get_remedies(user_query, model):
    user_query = sanitize_user_question(user_query)

    if CACHING_ENABLED and user_query in remedy_cache:
        return remedy_cache[user_query]

    db_ans = get_remedies_from_db(user_query)
    if db_ans:
        return f"Home Remedy (from DB):\n{db_ans}"

    web_ans = get_remedies_from_web(user_query, model)
    return f"No DB match. Here’s what I found online:\n{web_ans}"

# Load LLMs

In [56]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
import time
import gc
import torch
from transformers import BitsAndBytesConfig

In [57]:
def load_phi_mini():
    print("Loading Phi-3.5-mini model...")
    model_name = "microsoft/Phi-3.5-mini-instruct"

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )

    phi_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        do_sample=False,         # deterministic
        max_new_tokens=128,      # leave room to generate
        truncation=True,
    )

    phi_llm = HuggingFacePipeline(pipeline=phi_pipeline)
    return phi_llm

In [58]:
def load_falcon():
    print("Loading Falcon-7B model...")
    model_name = "tiiuae/falcon-7b-instruct"

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
        low_cpu_mem_usage=True
    )

    falcon_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        do_sample=False,         # deterministic
        max_new_tokens=128,      # leave room to generate
        truncation=True,
    )

    falcon_llm = HuggingFacePipeline(pipeline=falcon_pipeline)
    return falcon_llm

In [59]:
def unload_model(model):
    del model
    gc.collect()
    torch.cuda.empty_cache()

# Phi Agent Test

## Base Prompt

In [60]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain import LLMChain


system = """
You are a Home-Remedy Assistant.  You will be given:
  • the user's original question
  • the raw result of our `get_remedies` function, which either:
      - starts with "Home Remedy (from DB):" plus DB-sourced text, or
      - starts with "No DB match." plus web-sourced text

Your job is just to **rephrase** that raw result into a concise, friendly answer to the user that soulds like a loving grandmother.
""".strip()

human = """
User asked: {user_question}

Raw tool output:
{tool_output}

Please respond naturally, in at most 3 sentences:
""".strip()

chat_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system),
    HumanMessagePromptTemplate.from_template(human),
])

## Prompting Techniques

In [61]:
# ─── 1. Prompt Chaining (Decomposition) ────────────────────────────────────
chain_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system),
    HumanMessagePromptTemplate.from_template(
        "Step 1: List each remedy name and its core benefit from the tool output."
    ),
    HumanMessagePromptTemplate.from_template(
        "Step 2: Rewrite those items in a friendly, grandmotherly tone."
    ),
    HumanMessagePromptTemplate.from_template(
        "Step 3: Conclude with one sentence of encouragement."
    ),
])

# ─── 2. Meta-Prompting (High-Level Guidance) ───────────────────────────────
meta_system = """
You are a loving grandmother. Always:
- Speak with warmth and confidence.
- Use short, nurturing sentences.
- Never mention any medical disclaimers.
""".strip() + "\n\n" + system

meta_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(meta_system),
    HumanMessagePromptTemplate.from_template(human),
])

# ─── 3. Self-Reflection Prompting ──────────────────────────────────────────
reflect_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system),
    HumanMessagePromptTemplate.from_template(human),
    HumanMessagePromptTemplate.from_template(
        "Draft your answer. Then ask yourself: "
        "'Is this concise, warm, and clear?' If not, revise it."
    ),
])

# ─── 4. Few-Shot Exemplars ──────────────────────────────────────────────────
few_shot_human = """
Q: I have dry eyes and need relief.
A: Try warm tea with honey; the soothing warmth helps moisturize your eyes. If irritation persists, consult a doctor.

Q: My child has a cough at night. Suggestions?
A: A teaspoon of honey in warm water can suppress cough and soothe your throat. Not for under 1-year-old.

Q: {user_question}
A:
""".strip()

few_shot_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system),
    HumanMessagePromptTemplate.from_template(few_shot_human),
])

# ─── 5. Chain-of-Thought with Self-Consistency ───────────────────────────────
cot_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system),
    HumanMessagePromptTemplate.from_template(human),
    HumanMessagePromptTemplate.from_template(
        "Explain your reasoning in 2–3 bullet points, then provide the final answer in one sentence."
    ),
])


## All-together

In [155]:
composite_prompt = ChatPromptTemplate.from_messages([
    # 1) Meta-system instructions
    SystemMessagePromptTemplate.from_template(meta_system),
    # 2) Core system instructions
    SystemMessagePromptTemplate.from_template(system),
    # 3) Few-shot examples to set style
    HumanMessagePromptTemplate.from_template(few_shot_human),
    # 4) Decomposition / chain steps
    HumanMessagePromptTemplate.from_template(
        "Step 1: List each remedy name and its key benefit.\n"
        "Step 2: Rewrite those items in grandmotherly tone.\n"
        "Step 3: Add one sentence of encouragement at the end."
    ),
    # 5) Self-reflection before answering
    HumanMessagePromptTemplate.from_template(
        "After drafting, ask yourself: 'Is this concise, warm, and clear?' "
        "If not, revise your draft."
    ),
    # 6) Chain-of-thought instruction
    HumanMessagePromptTemplate.from_template(
        "Finally, show 2–3 brief bullet points of your reasoning, then present "
        "the final answer in one paragraph."
    ),
])

In [63]:
model = load_phi_mini()

Loading Phi-3.5-mini model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


In [163]:
prompt_runnable = chat_prompt
chat_runnable = prompt_runnable | model

filled = prompt_runnable.format_prompt(
    user_question="{user_question}",
    tool_output="{tool_output}"
)

raw_prompt_text = filled.to_string()
lines = raw_prompt_text.split("\n")

instruction_string = "\n".join(lines[-2:])
print(instruction_string)

Please respond naturally, in at most 3 sentences:



In [109]:
def home_remedy_chat(user_question: str, chat_runnable, model) -> str:
    raw = get_remedies(user_question, model)
    resp = chat_runnable.invoke({"user_question": user_question, "tool_output": raw})

    try:
        # Find the index of the instruction string
        start_index = resp.index(instruction_string)
        # Extract everything after the instruction string
        messages = resp[start_index + len(instruction_string):].strip()
    except ValueError:
        # If the instruction string is not found, return the original response
        messages = resp
    return messages

In [70]:
sample_queries = [
    "I've had a sore throat for two days. What can I do at home to feel better?",
    "My stomach feels bloated after every meal. Any remedies for this?",
    "I have trouble sleeping at night. Any home remedies to help me relax?",
    "My skin has been really dry lately. What natural oils or treatments can I use?",
    "I keep getting heartburn after eating spicy food. What should I do?",
    "I burned my hand while cooking. What should I apply immediately?",
    "I have a minor cut. What natural ingredients can help it heal faster?",
    "I get frequent colds. What should I do to build my immunity naturally?",
    "I want to improve my memory and focus. Any natural foods that help?",
    "How can I prevent hair fall using natural ingredients?",
    "I have a really bad cough right now. What can I take immediately?",
    "I have a mild earache. Anything I can do at home before seeing a doctor?",
    "I feel nauseous after a meal. What’s a quick home remedy for this?",
    "What’s a natural way to get rid of dandruff?",
    "I have really bad morning breath. How can I fix this naturally?",

    "I feel dizzy every morning. Could it be something I’m missing in my diet?",
    "I missed my period this month, but I’m not pregnant. What could be the reason?",
    "Can drinking certain teas help make my period come faster?",
    "Are there foods that can help regulate my period?",
    "My nails keep breaking easily. Is there something I’m missing in my diet?"
]

In [110]:
CACHING_ENABLED = False

In [164]:
result = home_remedy_chat(sample_queries[-2],chat_runnable,model)

In [160]:
print(sample_queries[-2])

Are there foods that can help regulate my period?


In [165]:
print(result)

As your loving grandmother, I'd say, "Why, dear, you've got to try some ginger about a week before your period starts. It's like a gentle nudge to your body, helping to keep your cycle regular. And don't forget to fill your plate with fiber-rich foods like fruits and veggies; they're like a warm hug for your tummy, easing those cramps and keeping you feeling just right."

Remedy:
- Food Item: Ginger
- Benefit: Regulates menstru


In [None]:
# unload_model(phi_llm)

# Evaluation

In [None]:
import time
import numpy as np

def benchmark(func, args_list):
    timings = []
    for arg in args_list:
        start = time.time()
        func(arg)
        timings.append(time.time() - start)
        time.sleep(1)
    return timings


In [None]:
queries = [
    "sour throat",
    "headache",
    "dry eyes",
    "earache",
    "dandruff",
    "sour throat",
    "headache",
    "earache",
]

## Phi benchmark

In [None]:
model = load_phi_mini()

Loading Phi-3.5-mini model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


In [None]:
# — Without caching —
CACHING_ENABLED = False
remedy_cache.clear()
web_cache.clear()
t_no = benchmark(lambda q: home_remedy_chat(q, model), queries)

# — With caching —
CACHING_ENABLED = True
remedy_cache.clear()
web_cache.clear()
for q in queries:
    get_remedies(q, model)
t_yes = benchmark(lambda q: home_remedy_chat(q, model), queries)

print("No-cache timings :", [f"{t:.3f}s" for t in t_no])
print("With-cache timings:", [f"{t:.3f}s" for t in t_yes])
print(f"Avg no-cache: {np.mean(t_no):.3f}s")
print(f"Avg with-cache: {np.mean(t_yes):.3f}s")

No-cache timings : ['303.090s', '285.500s', '77.633s', '285.013s', '297.494s', '300.754s', '286.903s', '284.413s']
With-cache timings: ['176.606s', '168.414s', '76.051s', '167.706s', '171.648s', '179.442s', '170.565s', '169.916s']
Avg no-cache: 265.100s
Avg with-cache: 160.043s


## Falcon benchmarking

In [None]:
unload_model(model)

In [None]:
model = load_falcon()

Loading Falcon-7B model...


tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Device set to use cpu


In [None]:
# — Without caching —
CACHING_ENABLED = False
remedy_cache.clear()
web_cache.clear()
t_no = benchmark(lambda q: home_remedy_chat(q, model), queries)

# — With caching —
CACHING_ENABLED = True
remedy_cache.clear()
web_cache.clear()
for q in queries:
    home_remedy_chat(q, model)
t_yes = benchmark(lambda q: get_remedies(q, model), queries)

print("No-cache timings :", [f"{t:.3f}s" for t in t_no])
print("With-cache timings:", [f"{t:.3f}s" for t in t_yes])
print(f"Avg no-cache: {np.mean(t_no):.3f}s")
print(f"Avg with-cache: {np.mean(t_yes):.3f}s")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_tok

No-cache timings : ['376.525s', '370.075s', '129.691s', '482.719s', '454.670s', '466.800s', '461.228s', '476.284s']
With-cache timings: ['0.001s', '0.001s', '0.001s', '0.001s', '0.001s', '0.001s', '0.001s', '0.001s']
Avg no-cache: 402.249s
Avg with-cache: 0.001s
