## Imports 

In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from openai import OpenAI
from openai import APIError, RateLimitError, AuthenticationError
import re
from datetime import datetime

import json

In [2]:
load_dotenv("../.env")

True

## Helper functions

In [3]:
client = OpenAI(api_key=os.getenv("OPEN_API_KEY"))
def get_llm_response(
    user_prompt: str,
    system_prompt: str = "You are a helpful assistant.",
    model: str = "gpt-4o-mini",
    get_tokens: bool = False,
    tools: list = None,
    tool_choice: str = "auto"
):
    """
    Calls the OpenAI Chat Completions API with specified prompts and model.
    Supports optional function/tool calling via `tools` and `tool_choice`.

    Args:
        user_prompt (str): The user's question or instruction.
        system_prompt (str): The system message to set the assistant's behavior.
        model (str): The name of the GPT model.
        get_tokens (bool): Whether to return token usage details.
        tools (list): Optional list of tool/function definitions (for function calling).
        tool_choice (str): How tool calling should be handled: "auto", "none", or specific.

    Returns:
        - If get_tokens=False: str (assistant's response)
        - If get_tokens=True: dict with keys {"response", "tokens"}
          where tokens = {"input_tokens", "output_tokens", "total_tokens"}
    """

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice
        )

        message_content = (
            response.choices[0].message.content
            if response.choices and response.choices[0].message
            else "Error: Empty response from API."
        )

        usage = getattr(response, "usage", None)
        tokens_dict = {
            "input_tokens": getattr(usage, "prompt_tokens", None),
            "output_tokens": getattr(usage, "completion_tokens", None),
            "total_tokens": getattr(usage, "total_tokens", None),
        } if usage else None

        if get_tokens:
            return {
                "response": message_content,
                "tokens": tokens_dict
            }
        else:
            return message_content

    except AuthenticationError:
        return "API Error: Authentication failed. Check your API key."
    except RateLimitError:
        return "API Error: Rate limit exceeded. Please wait before retrying."
    except APIError as e:
        return f"API Error: An OpenAI API error occurred: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"


def jsonify_output(llm_output: str):
    """
    Safely converts a stringified LLM response into a Python dictionary.
    Designed for cases where the LLM is instructed to return JSON that can be parsed by json.loads(),
    but may include extra formatting, markdown, or escaped characters.
    
    Args:
        llm_output (str): The raw LLM string output.
    
    Returns:
        dict: Parsed dictionary with keys/values from the LLM output.
    """
    # Step 1: Clean common wrappers (e.g., code blocks, quotes)
    cleaned = llm_output.strip()
    
    # Remove markdown code block syntax (```json ... ```)
    cleaned = re.sub(r"^```(json)?", "", cleaned)
    cleaned = re.sub(r"```$", "", cleaned)
    
    # Step 2: Handle single quotes or extra escaping
    # Convert Python-style dict string → JSON-compatible
    cleaned = cleaned.strip()
    
    # If it starts and ends with quotes (common case), remove them
    if cleaned.startswith("'") and cleaned.endswith("'"):
        cleaned = cleaned[1:-1]
    if cleaned.startswith('"') and cleaned.endswith('"'):
        cleaned = cleaned[1:-1]

    # Replace any escaped quotes \" or \'
    cleaned = cleaned.replace('\\"', '"').replace("\\'", "'")

    # Step 3: Try loading directly as JSON
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError as e:
        print("⚠️ json.loads() failed, trying fallback fix…")
        # Attempt to extract JSON content using regex
        match = re.search(r"\{.*\}", cleaned, re.DOTALL)
        if match:
            try:
                return json.loads(match.group(0))
            except Exception as inner_e:
                print("❌ Still failed to parse JSON:", inner_e)
        raise ValueError(f"Failed to parse JSON from LLM output: {e}\n\nCleaned text:\n{cleaned[:500]}")


In [4]:
get_llm_response("what is the capital of france?")

'API Error: An OpenAI API error occurred: Error code: 400 - {\'error\': {\'message\': "Invalid value for \'tool_choice\': \'tool_choice\' is only allowed when \'tools\' are specified.", \'type\': \'invalid_request_error\', \'param\': \'tool_choice\', \'code\': None}}'

## Evernote handson(deprecated)

In [6]:
#!pip install oauth2

In [None]:
# pip install evernote3

Collecting evernote3Note: you may need to restart the kernel to use updated packages.

  Downloading evernote3-1.25.14-py3-none-any.whl (143 kB)
     -------------------------------------- 143.7/143.7 kB 1.4 MB/s eta 0:00:00
Collecting oauthlib
  Downloading oauthlib-3.3.1-py3-none-any.whl (160 kB)
     ------------------------------------ 160.1/160.1 kB 954.3 kB/s eta 0:00:00
Collecting requests-oauthlib
  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)
Installing collected packages: oauthlib, requests-oauthlib, evernote3
Successfully installed evernote3-1.25.14 oauthlib-3.3.1 requests-oauthlib-2.0.0


In [6]:
from evernote.api.client import EvernoteClient

In [7]:
client = EvernoteClient()

In [8]:
client.get_user_store()

TypeError: expected string or bytes-like object

## Intent and note identification

### Parsing Evernote Notes

In [4]:
def build_notes_index(root_folder):
    """
    Walks through root_folder and finds all .txt files.
    Returns a dict: { filename: path }.
    If duplicate filenames are found, stores list of paths.
    """
    notes_index = {}

    for dirpath, dirnames, filenames in os.walk(root_folder):
        for fname in filenames:
            if fname.lower().endswith('.txt'):
                # build relative path
                full_path = os.path.join(dirpath, fname)
                # Make path relative to root_folder if you prefer
                rel_path = os.path.relpath(full_path, start=root_folder)
                # Optionally prefix with ./notes/
                dict_value = os.path.join(root_folder, rel_path)

                # handle possible duplicate filenames
                if fname in notes_index:
                    # if existing value is a str, convert to list
                    existing = notes_index[fname]
                    if isinstance(existing, str):
                        notes_index[fname] = [ existing ]
                    # now append new path
                    notes_index[fname].append(dict_value)
                else:
                    notes_index[fname] = dict_value

    return notes_index

In [5]:
notes_path_dict = build_notes_index("./notes")

In [6]:
notes_list = list(notes_path_dict.values())

### Prompting and LLM call

In [7]:
INTNT_NOTE_PROMPT = """ 
You are the user's Evernote Manager — an expert assistant that helps manage and understand the user's local Evernote-style notes.

You will be given:
- {user_query}: a natural language instruction.
- {note_list} list of available note filenames. They are in format "note_name.txt".


Your job is to:
1. Identify which note the user is referring to.
2. Determine whether the user wants to **retrieve_info** or **edit_note**.

### How to approach:
1. **Keyword match:** Check if any note name exactly matches or shares key words with the query.  
2. **Substring match:** If no keyword match, look for substring overlap between the query and note names.  
3. **Reasoned guess:** If still unclear, use general knowledge or intuition about the query to guess which note fits best.  
4. If no confident match is possible, return `"no_match"` as the note.

### Intent detection:
- **retrieve_info:** when the user asks to show, read, open, or view a note.  
- **edit_note:** when the user asks to change, add, update, or edit something in a note.  

### Output format:
Return a valid JSON object (parsable by `json.loads`) with these keys only:
{{
    "reasoning": string, brief text explaining how you chose the note and intent,
    "note": string, the most probable note filename, or "no_match" if none found,
    "intent": string, one of "retrieve_info" or "edit_note"
}}

### Example output:

{{
  "reasoning": "No exact filename or substring match found, but based on the query about 'reps for chest press', it is most likely related to workouts, which fits the note 'exercises'.",
  "note": "exercises",
  "intent": "retrieve_info"
}}

"""

In [32]:
user_query = "what should be the rep range for leg press for me"
INTNT_NOTE_PROMPT_FORMATTED = INTNT_NOTE_PROMPT.format(
    user_query=user_query,
    note_list=", ".join(notes_path_dict.keys())
)
print(INTNT_NOTE_PROMPT_FORMATTED)

 
You are the user's Evernote Manager — an expert assistant that helps manage and understand the user's local Evernote-style notes.

You will be given:
- what should be the rep range for leg press for me: a natural language instruction.
- movies.txt, exercises.txt list of available note filenames. They are in format "note_name.txt".


Your job is to:
1. Identify which note the user is referring to.
2. Determine whether the user wants to **retrieve_info** or **edit_note**.

### How to approach:
1. **Keyword match:** Check if any note name exactly matches or shares key words with the query.  
2. **Substring match:** If no keyword match, look for substring overlap between the query and note names.  
3. **Reasoned guess:** If still unclear, use general knowledge or intuition about the query to guess which note fits best.  
4. If no confident match is possible, return `"no_match"` as the note.

### Intent detection:
- **retrieve_info:** when the user asks to show, read, open, or view a note

In [9]:
", ".join(notes_path_dict.keys())

'movies.txt, exercises.txt'

In [33]:
intnt_response = get_llm_response(
    user_prompt=INTNT_NOTE_PROMPT_FORMATTED,
    model="gpt-4o-mini"
)

In [34]:
jsonified_intnt = jsonify_output(intnt_response)
print(jsonified_intnt)

{'reasoning': "The query about the rep range for leg press indicates a need for workout-related information. The note 'exercises.txt' likely contains relevant information about different exercises and their recommended rep ranges.", 'note': 'exercises.txt', 'intent': 'retrieve_info'}


## Performing action

In [35]:
action_prompt = jsonified_intnt['intent']
action_note = jsonified_intnt['note']


In [36]:
action_prompt_fname = os.path.join("./prompts", f"{action_prompt}.txt")
with open(action_prompt_fname, "r") as f:
    action_prompt_template = f.read()

In [37]:
print(action_prompt_template)

You are the user's Evernote Manager — an expert assistant responsible for retrieving information from the user's notes.

## INPUT FORMAT:
1. {user_query}: The user's **query** (a natural-language question or request for information).  
2. {note_content}: The **current text of the note** (the complete content of the note for search).

## TASK:
- Read the query carefully and look for the requested information within the note.
- The information may be stated **directly** (explicitly mentioned) or **indirectly** (implied through related context, wording, or examples).
- If the requested information can be inferred from the note, explain how you derived it.
- If the information is **not present** or cannot be inferred confidently, return `"not_found"` as the answer.

## EXTRA INFO
You may also use the **web_search** tool if you need to interpret certain terms, names, or references to understand the note better.  
Do not use it to replace missing note content — only to clarify meaning when n

In [38]:
action_note_fname = notes_path_dict.get(action_note, None)
with open(action_note_fname, "r") as f:
    action_note_content = f.read()

In [39]:
print(action_note_content)

I am currently a beginner in the gym, and I am following the guidance of Jeet Selal and Deltabolic.  
If I ask any questions about exercises, priority should be given to their advice. If the information is not in this note, any web search should first refer to their content.

1. General things to keep in mind  
- Form and tempo matter more than weight.  
- Focus on both the concentric and eccentric portions of each lift — both are important for development.  
- Breathing: exhale during the concentric phase, inhale during the eccentric phase.  
- Mind-muscle connection is very important at the beginner phase.  
- Tempo should be: 3…2…1 (lowering) then contract for 1, then 3…2…1 again (lowering) then 1… (pause), then 3…2…1, etc.  
- Abs training is not equivalent to cardio.  
- I am currently following a BRO-split.  
- For muscle building, the most important things are: form, tempo, progressive overload.  
- When lifting dumbbells (especially heavier ones), lift them from the ground with

In [40]:
## Helper function

def response_with_web_search(inp_prompt: str, model: str = "gpt-5", get_tokens: bool = False):
    """
    Calls the OpenAI Responses API with web search tool enabled.

    Args:
        inp_prompt (str): The user's input text or query.
        model (str): Model name to use (default: gpt-5).
        get_tokens (bool): Whether to return token usage details.

    Returns:
        - If get_tokens=False: str (assistant’s response)
        - If get_tokens=True: dict with keys {"response", "tokens"}
          where tokens = {"input_tokens", "output_tokens", "total_tokens"}
    """
    try:
        response = client.responses.create(
            model=model,
            input=[
                {
                    "role": "user",
                    "content": [
                        {"type": "input_text", "text": inp_prompt}
                    ]
                }
            ],
            text={
                "format": {"type": "text"},
                "verbosity": "low"
            },
            reasoning={"effort": "low"},
            tools=[
                {
                    "type": "web_search",
                    "user_location": {"type": "approximate"},
                    "search_context_size": "medium"
                }
            ],
            store=True,
            include=[
                "reasoning.encrypted_content",
                "web_search_call.action.sources"
            ]
        )

        # Extract the assistant’s text output
        message_output = None
        if hasattr(response, "output") and response.output:
            for item in response.output:
                if getattr(item, "type", None) == "message" and item.content:
                    for content_piece in item.content:
                        if content_piece.type == "output_text":
                            message_output = content_piece.text
                            break
                if message_output:
                    break
        message_output = message_output or "No text output found."

        # Extract token usage
        usage = getattr(response, "usage", None)
        tokens_dict = {
            "input_tokens": getattr(usage, "input_tokens", None),
            "output_tokens": getattr(usage, "output_tokens", None),
            "total_tokens": getattr(usage, "total_tokens", None),
        } if usage else None

        if get_tokens:
            return {
                "response": message_output,
                "tokens": tokens_dict
            }
        else:
            return message_output

    except Exception as e:
        return f"An error occurred: {e}"

In [41]:
user_query

'what should be the rep range for leg press for me'

In [45]:
action_response = response_with_web_search(action_prompt_template.format(
    user_query=user_query,
    note_content=action_note_content,
    ),
    model="gpt-5"
)


In [46]:
jsonify_output(action_response)

{'reasoning': "In section 10.2 (Leg Press) under 'Reps & Sets', the note specifies the rep target for working sets.",
 'final_text': '12–15 reps to failure per working set (with 4 working sets; 1–2 warm-up sets if needed).'}

In [47]:
print(jsonify_output(action_response)['final_text'])

12–15 reps to failure per working set (with 4 working sets; 1–2 warm-up sets if needed).


In [31]:
print(resp)

Short answer:
- Grip: Use either standard or thumbless. Thumbless often helps relax forearms and focus rear delts; on a machine it’s safe. Pick what lets you feel rear delts best.
- Elbows: Keep a soft bend (~15–30°), not locked. Lead the movement with your elbows.
- Elbow path: Track elbows in line with shoulders (roughly straight out to the sides). Don’t tuck them toward your ribs; don’t let them drift way up or down.

Form cues:
- Seat so handles are at mid‑shoulder height; chest supported.
- Shoulders down and slightly “wide.” Don’t shrug.
- Move slow: 2–3s out, brief pause, 2–3s back. Stop when hands reach roughly in line with your torso; no jerking.

Sets, reps, rest (beginner ~2 months):
- 3 working sets, 12–20 reps, 1–2 reps in reserve.
- Rest 60–90 seconds.
- Do 1–2 times per week.

Common mistakes to avoid:
- Locking elbows straight or turning it into a row with big elbow bend.
- Letting traps take over (shrugging).
- Excessive scapular retraction; think “arms out, not back.”

In [None]:
user_prompt = "Modify the notes that has movies listed in it and categorize like into hollywood and bollywood and then categories the movies based on genre like action, comedy, drama etc."

## Progressive overloading tracker

In [5]:
import pandas as pd

# Progressive Overload Tracking Table

data = {
    "muscle area": [
        # Chest
        "Chest", "Chest", "Chest", "Chest", "Chest",

        # Back
        "Back", "Back", "Back", "Back", "Back",

        # Biceps
        "Biceps", "Biceps", "Biceps",

        # Triceps
        "Triceps", "Triceps", "Triceps",

        # Shoulders
        "Shoulders", "Shoulders", "Shoulders", "Shoulders",

        # Legs
        "Legs", "Legs", "Legs", "Legs", "Legs", "Legs",

        # Forearms
        "Forearms", "Forearms", "Forearms"
    ],

    "exercise name": [
        # Chest
        "Flat Dumbbell Chest Press",
        "Incline Dumbbell Chest Press",
        "Pec Deck Fly",
        "Incline Dumbbell Fly",
        "Decline Dumbbell Fly",

        # Back
        "Lat Pulldown",
        "Close-Grip Seated Row",
        "Wide-Grip Seated Row",
        "Seal Row",
        "Straight-Arm Lat Pulldown",

        # Biceps
        "Dumbbell Biceps Curl",
        "Hammer Curl",
        "Incline Dumbbell Curl",

        # Triceps
        "Triceps Dips",
        "Triceps Cable Extension",
        "Overhead Dumbbell Press",   # corrected (from "overhead dumbell press")

        # Shoulders
        "Shrugs",
        "Lateral Raises",
        "Overhead Dumbbell Press",
        "Rear Delt Fly",

        # Legs
        "Leg Press",
        "Leg Extension",
        "Leg Curl",
        "Standing Calf Raises",
        "Seated Calf Raises",         # corrected (from "sitting calves raises")
        "Single-Leg Stiff Deadlift",

        # Forearms
        "Reverse Curl",
        "Flexor Wrist Curl (Barbell/Rod)",
        "Extensor Wrist Curl (Barbell/Rod)"
    ]
}

df = pd.DataFrame(data)

# Initialize tracking columns
df["max weight"] = 0
df["week"] = 1

print(df)


   muscle area                      exercise name  max weight  week
0        Chest          Flat Dumbbell Chest Press           0     1
1        Chest       Incline Dumbbell Chest Press           0     1
2        Chest                       Pec Deck Fly           0     1
3        Chest               Incline Dumbbell Fly           0     1
4        Chest               Decline Dumbbell Fly           0     1
5         Back                       Lat Pulldown           0     1
6         Back              Close-Grip Seated Row           0     1
7         Back               Wide-Grip Seated Row           0     1
8         Back                           Seal Row           0     1
9         Back          Straight-Arm Lat Pulldown           0     1
10      Biceps               Dumbbell Biceps Curl           0     1
11      Biceps                        Hammer Curl           0     1
12      Biceps              Incline Dumbbell Curl           0     1
13     Triceps                       Triceps Dip

In [6]:
df.to_excel("./notes/areas/health/progressive_overload_tracking.xlsx", index=False)

In [None]:
PROG_OVER_PROMPT = """
You are a fitness assistant that helps users track their progressive overload in weight training.

##INPUT
    1. {user_input} : a natural language instruction.
    2. {exercise_table}: A table of exercises with columns: muscle area, exercise name, max weight, week.

## Task  
    You will interpret the user’s input and decide whether the user intends to:  
    - **modify_week** – increase the week counter for all exercises of a muscle area (e.g., the user says “I did biceps and triceps today”).  
    - **modify_weight** – update the max weight for a specific exercise (e.g., the user says “Incline Dumbbell Chest Press now 20 kg”).  

## Extra details
    - It can be either one of the above actions or both for an input.
    - An example for only modify_week: "I worked on chest and back today".
    - An example for only modify_weight: "Flat Dumbbell Chest Press now 25 kg".
    - An example for both: "I did biceps and triceps today , bicep curls 15 kg and triceps push down with 12 kg today".

"""

In [8]:
def modify_week(df: pd.DataFrame, muscle_areas: list[str]) -> dict:
    """
    Increase the 'week' counter by 1 for all exercises in the given muscle area(s).
    
    Args:
        df: DataFrame containing the tracking table.
        muscle_areas: List of muscle area names (e.g., ["Chest", "Back"]).
        
    Returns:
        dict with status, list of muscle_areas updated, and rows_updated per area.
    """
    results = []
    for area in muscle_areas:
        mask = df["muscle area"].str.lower() == area.lower()
        if not mask.any():
            results.append({"muscle_area": area, "status": "error", "message": "No exercises found for this muscle area"})
        else:
            df.loc[mask, "week"] += 1
            new_week = int(df.loc[mask, "week"].iloc[0])
            results.append({"muscle_area": area, "status": "success", "new_week": new_week, "rows_updated": int(mask.sum())})
    return {"results": results}


def modify_weight(df: pd.DataFrame, updates: list[dict]) -> dict:
    """
    Update the 'max weight' for given exercise(s) if the new weight is greater than the current recorded weight.
    
    Args:
        df: DataFrame containing the tracking table.
        updates: List of update dicts, each with keys:
                 muscle_area (str), exercise_name (str), new_weight (float).
    
    Returns:
        dict with results list showing status, old_weight, new_weight for each update.
    """
    results = []
    for upd in updates:
        area = upd.get("muscle_area")
        exer = upd.get("exercise_name")
        new_w = upd.get("new_weight")
        mask = (df["muscle area"].str.lower() == area.lower()) & \
               (df["exercise name"].str.lower() == exer.lower())
        if not mask.any():
            results.append({
                "muscle_area": area,
                "exercise_name": exer,
                "status": "error",
                "message": "Exercise not found for this muscle area"
            })
        else:
            current_weight = float(df.loc[mask, "max weight"].iloc[0])
            if new_w > current_weight:
                df.loc[mask, "max weight"] = new_w
                df.loc[mask, "week"] = 1  # reset week for that exercise
                results.append({
                    "muscle_area": area,
                    "exercise_name": exer,
                    "status": "success",
                    "old_weight": current_weight,
                    "new_weight": new_w
                })
            else:
                results.append({
                    "muscle_area": area,
                    "exercise_name": exer,
                    "status": "no_change",
                    "old_weight": current_weight,
                    "attempted_weight": new_w,
                    "message": "New weight is not greater than current recorded weight"
                })
    return {"results": results}


In [9]:
prog_over_tools = \
[
  {
    "type": "function",
    "name": "modify_week",
    "description": "Increase the week counter by 1 for one or more muscle areas.",
    "parameters": {
      "type": "object",
      "properties": {
        "muscle_areas": {
          "type": "array",
          "items": {
            "type": "string",
            "description": "Name of a muscle area (e.g., 'Chest', 'Back', 'Legs')."
          },
          "description": "List of muscle areas whose week counter should be incremented."
        }
      },
      "required": ["muscle_areas"]
    }
  },
  {
    "type": "function",
    "name": "modify_weight",
    "description": "Update the max weight for one or more specific exercises; resets week for each exercise when weight increases.",
    "parameters": {
      "type": "object",
      "properties": {
        "updates": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "muscle_area": {
                "type": "string",
                "description": "The muscle area for the exercise."
              },
              "exercise_name": {
                "type": "string",
                "description": "The exact exercise name to update (e.g., 'Incline Dumbbell Chest Press')."
              },
              "new_weight": {
                "type": "number",
                "description": "The new weight lifted (in kg or chosen unit)."
              }
            },
            "required": ["muscle_area", "exercise_name", "new_weight"]
          },
          "description": "List of exercises to update with their new weights."
        }
      },
      "required": ["updates"]
    }
  }
]


In [10]:
df = pd.read_excel("./notes/areas/health/progressive_overload_tracking.xlsx" , engine='openpyxl')

In [11]:
df

Unnamed: 0,muscle area,exercise name,max weight,week
0,Chest,Flat Dumbbell Chest Press,0,1
1,Chest,Incline Dumbbell Chest Press,0,1
2,Chest,Pec Deck Fly,0,1
3,Chest,Incline Dumbbell Fly,0,1
4,Chest,Decline Dumbbell Fly,0,1
5,Back,Lat Pulldown,0,1
6,Back,Close-Grip Seated Row,0,1
7,Back,Wide-Grip Seated Row,0,1
8,Back,Seal Row,0,1
9,Back,Straight-Arm Lat Pulldown,0,1


### User prompt enterign for overloading

In [12]:
df.columns

Index(['muscle area', 'exercise name', 'max weight', 'week'], dtype='object')

In [None]:
prog_over_user_input = "biceps and triceps today : dumbell curl with 10kg , triceps dips now 15kg , triceps bar pulldown exercise with 27kg"

exercise_table_str = df[['muscle area', 'exercise name']].to_string(index=False)

input_list = [
    {
        "role": "user",
        "content": PROG_OVER_PROMPT.format(
            user_input=prog_over_user_input,
            exercise_table=exercise_table_str
        )
    }
]
# 2. Prompt the model with tools defined
response = client.responses.create(
    model="gpt-5",
    tools=prog_over_tools,
    input=input_list,
    tool_choice="auto"
    
)

In [14]:
response.output


[ResponseReasoningItem(id='rs_0861f2edc74ad5b80069031251bea08190880fde3625607c38', summary=[], type='reasoning', content=None, encrypted_content=None, status=None),
 ResponseFunctionToolCall(arguments='{"muscle_areas":["Biceps","Triceps"]}', call_id='call_ixOQIVdw6va4DzU24u0ZNwcN', name='modify_week', type='function_call', id='fc_0861f2edc74ad5b8006903125d43ec8190a351ae5f05e3d76b', status='completed'),
 ResponseFunctionToolCall(arguments='{"updates":[{"muscle_area":"Biceps","exercise_name":"Dumbbell Biceps Curl","new_weight":10},{"muscle_area":"Triceps","exercise_name":"Triceps Dips","new_weight":15},{"muscle_area":"Triceps","exercise_name":"Triceps Cable Extension","new_weight":27}]}', call_id='call_yQnumg7WxcNwJk5EBGqJYuew', name='modify_weight', type='function_call', id='fc_0861f2edc74ad5b8006903125dddb481908c7307896a22f002', status='completed')]

In [15]:
for item in response.output:
    if item.type == "function_call":
        print(item.name)
        if item.name == "modify_week":
            args = jsonify_output(item.arguments)['muscle_areas']
            modify_week(df, args)
        elif item.name == "modify_weight":
            args = jsonify_output(item.arguments)['updates']
            modify_weight(df, args)
        else:
            print("Unknown function:", item.name)

modify_week
modify_weight


In [16]:
df

Unnamed: 0,muscle area,exercise name,max weight,week
0,Chest,Flat Dumbbell Chest Press,0,1
1,Chest,Incline Dumbbell Chest Press,0,1
2,Chest,Pec Deck Fly,0,1
3,Chest,Incline Dumbbell Fly,0,1
4,Chest,Decline Dumbbell Fly,0,1
5,Back,Lat Pulldown,0,1
6,Back,Close-Grip Seated Row,0,1
7,Back,Wide-Grip Seated Row,0,1
8,Back,Seal Row,0,1
9,Back,Straight-Arm Lat Pulldown,0,1


In [17]:
df.to_excel("./notes/areas/health/progressive_overload_tracking.xlsx", index=False)

In [None]:
### Future steps
    

## Calorie tracking

In [6]:
calorie_df = pd.DataFrame(columns=["date", "item", "calories", "protein"])
calorie_df.to_excel("./notes/areas/health/calorie_tracking.xlsx", index=False)

In [7]:
### Calorie Tracking Prompt

In [26]:
CLR_TRCK_PROMPT = """
## Role
    - You are the user’s Health Assistant — an expert in tracking nutrition and calories to support their fitness journey.
    - The user will provide food items they have consumed (e.g., “I had a chicken burger and a banana split today at 15:30” or “Breakfast: oatmeal with berries, lunch: tuna salad”).
    - You need to find out the calories and protein content for each food item, either from the user input or by looking it up online.


## Input
    1. {user_input} : a natural language description of food items consumed by the user.


## Tools in access
    1. **web_search**: when you need to look up nutrition information for a food item (e.g., “calories and protein in 1 banana”).

Your role:


## Task:
1. Read the user’s input and identify the food items and consumption context.
2. For each item, check if calories/protein values are provided; if not, use **web_search** to fetch estimates.


## Output:
    - Return a valid JSON object (parsable by `json.loads`) with these keys only:

    {{
        "summary": string, brief text summarizing the food items and their nutrition,
        "items": [
            {{
                "item": string, name of the food item,
                "calories": number, estimated calories (in kcal),
                "protein": number, estimated protein in grams
            }},
            ...
        ]
    }}


"""

### Functions and tools def

In [27]:
CALORIE_FILE_PATH = "./notes/areas/health/calorie_tracking.xlsx"

In [None]:
def update_calorie_tracker(items: list[dict] , df: pd.DataFrame = None):
    """
    Appends multiple calorie entries into the calorie tracking Excel file.

    items format (coming from LLM JSON):
    [
        { "item": "Boiled eggs, 2 large", "calories": 155.0, "protein": 12.6 },
        { "item": "Soya chunks (dry), 20 g", "calories": 73.2, "protein": 10.2 }
    ]
    """

    # ✅ Use today's date
    today = datetime.now().strftime("%Y-%m-%d")
    # # ✅ If file exists -> read it, else create new dataframe
    # if os.path.exists(CALORIE_FILE_PATH):
    #     df = pd.read_excel(CALORIE_FILE_PATH)
    # else:
    #     df = pd.DataFrame(columns=["date", "item", "calories", "protein"])

    # ✅ Append every item to dataframe
    rows_added = []
    for entry in items:
        new_row = {
            "date": today,
            "item": entry["item"],
            "calories": float(entry["calories"]),
            "protein": float(entry["protein"]),
        }
        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
        rows_added.append(new_row)

    # ✅ Save back to Excel
    df.to_excel(CALORIE_FILE_PATH, index=False)

    return {
        "status": "success",
        "rows_added": rows_added
    }


In [None]:
# calorie_tracker_tools = \
# [
#   {
#     "type": "function",
#     "name": "get_date",
#     "description": "Get the current date in the format YYYY-MM-DD.",
#     "parameters": {
#       "type": "object",
#       "properties": {},
#       "required": []
#     }
#   },
#   {
#     "type": "function",
#     "name": "update_calorie_csv",
#     "description": "Append a new row to the calorie tracking file with columns: date, item, calories, protein.",
#     "parameters": {
#       "type": "object",
#       "properties": {
#         "date": {
#           "type": "string",
#           "description": "Date in format YYYY-MM-DD."
#         },
#         "item": {
#           "type": "string",
#           "description": "Name of the food item consumed."
#         },
#         "calories": {
#           "type": "number",
#           "description": "Calories of the food item."
#         },
#         "protein": {
#           "type": "number",
#           "description": "Protein (in grams) of the food item."
#         }
#       },
#       "required": ["date", "item", "calories", "protein"]
#     }
#   },
#   {
#     "type": "web_search",
#     "user_location": {"type": "approximate"},
#     "search_context_size": "medium"
#   }
# ]


### user input

In [35]:
calorie_tracker_input = "1 big crispy chicken burger and pink lemonade at burger singh"
input_list = [
    {
        "role": "user",
        "content": CLR_TRCK_PROMPT.format(
            user_input=calorie_tracker_input
        )
    }
]
# 2. Prompt the model with tools defined
response = client.responses.create(
    model="gpt-5",
    tools=[
        {
        "type": "web_search",
        "user_location": {"type": "approximate"},
        "search_context_size": "low"
        },
    ],
    input=input_list,
    tool_choice="auto",
    store=False,
    reasoning={"effort": "low"},
)


In [36]:
jsonify_output(response.output_text)

{'summary': 'Logged 1 Big Crispy Chicken burger and 1 pink lemonade from Burger Singh. Burger: ~450 kcal and ~19.6 g protein, based on Burger Singh’s nutrition chart. Pink lemonade (estimated 300 ml): ~124 kcal and ~0 g protein using generic pink lemonade per-cup data scaled to 300 ml. ([es.scribd.com](https://es.scribd.com/document/663542677/Bs-Nutrition-Values))',
 'items': [{'item': 'Big Crispy Chicken burger (Burger Singh)',
   'calories': 450.22,
   'protein': 19.58},
  {'item': 'Pink lemonade (~300 ml)', 'calories': 124, 'protein': 0}]}

In [37]:
result = jsonify_output(response.output_text)

# 2️⃣ Extract only items list
items = result["items"]

# 3️⃣ Update tracker
update_status = update_calorie_tracker(items)

print(update_status)

{'status': 'success', 'rows_added': [{'date': '2025-10-30', 'item': 'Big Crispy Chicken burger (Burger Singh)', 'calories': 450.22, 'protein': 19.58}, {'date': '2025-10-30', 'item': 'Pink lemonade (~300 ml)', 'calories': 124.0, 'protein': 0.0}]}


## Integrating speech recordingm

In [45]:
audio_file= open("./testing_data/recording_4.m4a", "rb")

transcription = client.audio.transcriptions.create(
    model="gpt-4o-transcribe", 
    file=audio_file,
    prompt="The following is a dialogue by an Indian man in english telling about his workout routine"
)

print(transcription.text)

## just improve the quality of text and pass it on

Hello everyone, my name is Hemang Sagar. I go to the gym now and today I did chest and shoulders. In chest exercises specifically, I did chest press with 20 kg dumbbells and other exercises at the existing weights along with shoulder press with 5 kg.


## Rough

In [1]:
!ls

README.md
main.py
notes
pipeline.ipynb
prompts
requirements.txt
testing_data
tools.py
utils.py


In [2]:
pwd

'c:\\Users\\HP\\Documents\\Projects\\LLMs\\evernote_agent'

## Backend code

In [1]:
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
from tqdm import tqdm
from utils import *
from tools import *
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Any, Dict, Optional
import uvicorn
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPEN_API_KEY"))

In [13]:
user_input = "Ate these things today " \
"1. 1 scoop whey protein from whole truth 24g protein in 36g serving " \
"2. 1 katori arbi sabze and 2 rotis " \
"3. chicken roll from roll box in gurgaon , " \
"4. 40 gms soya chunks" \
"5. 3 bananas "\
"6. 1 himalayn green tea from third wave coffee "\
"7. 1 glass milk and 2 breads "\
"8. 3 breads with salsa sauce from wingreens"


In [14]:
print(user_input)

Ate these things today 1. 1 scoop whey protein from whole truth 24g protein in 36g serving 2. 1 katori arbi sabze and 2 rotis 3. chicken roll from roll box in gurgaon , 4. 40 gms soya chunks5. 3 bananas 6. 1 himalayn green tea from third wave coffee 7. 1 glass milk and 2 breads 8. 3 breads with salsa sauce from wingreens


In [15]:
note_desc = {
    "movies.txt": "A watchlist of movies the user plans to watch in the future. Contains movie titles, possibly with genres, ratings, or recommendations. Used when user wants to add movies to watch, check what's on their list, or mark movies as watched.",
    
    "calorie_tracking.xlsx": "Daily food and nutrition log tracking calories, macronutrients (protein, carbs, fats), and meal details. Used for diet management and ensuring adequate protein intake for muscle building. Relevant for queries about food intake, daily calories, macro counting, meal logging, or nutritional tracking.",
    
    "exercises.txt": "A comprehensive exercise reference guide containing workout instructions, proper form techniques, execution rules, and best practices for various muscle groups (chest, back, legs, shoulders, arms, core). This is a static manual/guide for learning how to perform exercises correctly, not for logging workouts. Relevant when user needs form tips, exercise variations, or technique guidance.",
    
    "progressive_overload_tracking.xlsx": "Workout log tracking weights, sets, and reps for each exercise over time to ensure progressive overload for muscle growth. Contains historical workout data showing weight progression for different exercises. Used for logging completed workouts, tracking strength gains, planning next session weights, or reviewing training progress. Different from exercises.txt - this tracks what you actually lifted, while exercises.txt explains how to lift."
}

notes_desc_path = "./note_descriptions.json"
with open(notes_desc_path, "w") as f:
    json.dump(note_desc, f, indent=4)


In [16]:
if not user_input:
    raise HTTPException(status_code=400, detail="Empty input not allowed.")

# region Step 1: Getting Intent
notes_path_dict = build_notes_index("./notes")
notes_list = list(notes_path_dict.values())
with open(notes_desc_path, "r") as f:
    note_desc = json.load(f)

print("notes path dict: " ,notes_path_dict)
print(f"Notes found: {len(notes_list)}")
#print("Note descriptions:", note_desc)


intnt_prmpt_file = "./prompts/get_intent.txt"
with open(intnt_prmpt_file, "r", encoding="utf-8") as f:
    intnt_prmpt = f.read()

intnt_response = get_llm_response(
    user_prompt=intnt_prmpt.format(user_query = user_input , note_list = notes_list, note_desc = json.dumps(note_desc)),
    model="gpt-4o-mini",
    get_tokens=True
)


try:
    intent_data = jsonify_output(intnt_response["response"])
except Exception as e:
    raise HTTPException(status_code=500, detail=f"LLM response parsing error: {e}")


notes path dict:  {'movies.txt': './notes\\areas\\fun-friends-going-out\\movies.txt', 'calorie_tracking.xlsx': './notes\\areas\\health\\calorie_tracking.xlsx', 'exercises.txt': './notes\\areas\\health\\exercises.txt', 'progressive_overload_tracking.xlsx': './notes\\areas\\health\\progressive_overload_tracking.xlsx'}
Notes found: 4


In [17]:
intent_data

{'reasoning': "The user has listed a variety of foods consumed, which indicates they want to log or track their food intake. The note related to calorie and nutrition tracking is 'calorie_tracking.xlsx', fitting the user's intent to edit their food log.",
 'note': 'calorie_tracking.xlsx',
 'intent': 'edit_note'}

### gc code test

In [18]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials

In [19]:
JSON_KEYFILE = './service-account-creds.json'
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(JSON_KEYFILE, scope)
gc_client = gspread.authorize(creds)



In [20]:
spreadsheets = gc_client.openall()

In [21]:
for i, spreadsheet in enumerate(spreadsheets, 1):
    print(f"{i}. {spreadsheet.title}")
    print(f"   ID: {spreadsheet.id}")
    print(f"   URL: {spreadsheet.url}")
    
    # List worksheets within this spreadsheet
    worksheets = spreadsheet.worksheets()
    print(f"   Worksheets: {', '.join([ws.title for ws in worksheets])}")

1. progressive_overload_tracking
   ID: 1Fh_sl3NLKgWukjJhm8_nIeR2r9llNjr9Q5M9PjQjFho
   URL: https://docs.google.com/spreadsheets/d/1Fh_sl3NLKgWukjJhm8_nIeR2r9llNjr9Q5M9PjQjFho
   Worksheets: Sheet1
2. calorie_tracking
   ID: 1elq9WI5-iWZfttMwHg-LtRIVeVRww6sNfljZ-usnc7g
   URL: https://docs.google.com/spreadsheets/d/1elq9WI5-iWZfttMwHg-LtRIVeVRww6sNfljZ-usnc7g
   Worksheets: Sheet1


In [22]:
# region Step 2: Performing Action Based on Intent
action_prompt = intent_data['intent']
action_note = intent_data['note']


if action_note == "no_match":
    raise HTTPException(status_code=400, detail="No matching note found for the given input.")

elif intent_data['intent'] not in ["retrieve_info", "edit_note"]:
    raise HTTPException(status_code=400, detail="Invalid intent detected.")

elif intent_data['intent'] == "retrieve_info":
    action_prompt_fname = os.path.join("./prompts", f"{action_prompt}.txt")
    with open(action_prompt_fname, "r") as f:
        action_prompt_template = f.read()

    action_note_fname = notes_path_dict.get(action_note, None)
    with open(action_note_fname, "r") as f:
        action_note_content = f.read()

    input_list = [
        {
            "role": "user",
            "content": action_prompt_template.format(
                user_query=user_input,
                note_content=action_note_content,
            )
        }
    ]
    # 2. Prompt the model with tools defined
    action_response = client.responses.create(
        model="gpt-5",
        tools=[
            {
            "type": "web_search",
            "user_location": {"type": "approximate"},
            "search_context_size": "low"
            },
        ],
        input=input_list,
        tool_choice="auto",
        store=False,
        reasoning={"effort": "low"},
    )

    try:
        final_output = jsonify_output(action_response.output_text)['final_text']
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"LLM action response parsing error: {e}")

    print("Final Output: \n", final_output)

elif intent_data['intent'] == "edit_note":
    if intent_data['note'] == "progressive_overload_tracking.xlsx":
        # Handle progressive overload tracking update
        sheet = gc_client.open("progressive_overload_tracking").sheet1
        df = pd.DataFrame(sheet.get_all_records())
        exercise_table_str = df[['muscle area', 'exercise name']].to_string(index=False)

        prog_over_prmpt_file = "./prompts/progressive_overloading_tracking.txt"
        with open(prog_over_prmpt_file, "r", encoding="utf-8") as f:
            PROG_OVER_PROMPT = f.read()

        input_list = [
            {
                "role": "user",
                "content": PROG_OVER_PROMPT.format(
                    user_input=user_input,
                    exercise_table=exercise_table_str
                )
            }
        ]
        # 2. Prompt the model with tools defined
        response = client.responses.create(
            model="gpt-5",
            tools=prog_over_tools,
            input=input_list,
            tool_choice="auto"
            
        )
        for item in response.output:
            if item.type == "function_call":
                print(item.name)
                if item.name == "modify_week":
                    args = jsonify_output(item.arguments)['muscle_areas']
                    modify_week(df, args)
                elif item.name == "modify_weight":
                    args = jsonify_output(item.arguments)['updates']
                    modify_weight(df, args)
                else:
                    print("Unknown function:", item.name)
        # Save back to Google Sheets
        sheet.clear()
        sheet.update([df.columns.values.tolist()] + df.values.tolist())

    elif intent_data['note'] == "calorie_tracking.xlsx":
        # Handle calorie tracking update
        sheet = gc_client.open("calorie_tracking").sheet1
        df = pd.DataFrame(sheet.get_all_records())
        
        calorie_tracker_path = "./prompts/calorie_tracking.txt"
        with open(calorie_tracker_path, "r", encoding="utf-8") as f:
            CLR_TRCK_PROMPT = f.read()

        input_list = [
            {
                "role": "user",
                "content": CLR_TRCK_PROMPT.format(
                    user_input=user_input
                )
            }
        ]
        # 2. Prompt the model with tools defined
        response = client.responses.create(
            model="gpt-5",
            tools=[
                {
                "type": "web_search",
                "user_location": {"type": "approximate"},
                "search_context_size": "low"
                },
            ],
            input=input_list,
            tool_choice="auto",
            store=False,
            reasoning={"effort": "low"},
        )    

        result = jsonify_output(response.output_text)
        items = result["items"]
        update_status = update_calorie_tracker(items, df)
        df = update_status["final_df"]

        sheet.clear()
        sheet.update([df.columns.values.tolist()] + df.values.tolist())
        # print(update_status)


In [36]:
jsonify_output(action_response.output_text)['final_text']

'- Koi Mil Gaya\n- Namastey London\n- Dil Chahta Hai\n- Tanu Weds Manu\n- De Dana Dan\n- Animal'

In [None]:
df = pd.rea