In [1]:
import pandas as pd
import numpy as np

In [2]:
import os
import openai
from getpass import getpass


os.environ['OPENAI_API_KEY'] = getpass()
openai.api_key = os.environ["OPENAI_API_KEY"] 

 ········


## Ingestion

In [5]:
df = pd.read_csv('../data/crossfit_exercise_plan_01.csv')

In [4]:
df.columns

Index(['Unnamed: 0', 'id', 'exercise_name', 'session_name', 'type_of_activity',
       'type_of_equipment', 'body_part', 'type', 'muscle_groups_activated',
       'instructions'],
      dtype='object')

In [6]:
documents = df.to_dict(orient="records")

In [7]:
documents[0]

{'Unnamed: 0': 0,
 'id': 0,
 'exercise_name': 'Cossack Squat',
 'session_name': 'Aerobic Body Building',
 'type_of_activity': 'Warm-up',
 'type_of_equipment': 'Dumbbells',
 'body_part': 'Full body',
 'type': 'Pull',
 'muscle_groups_activated': 'Upper Chest, Shoulders',
 'instructions': 'Warm-up adequately before performing this exercise to prevent injury.'}

In [207]:
import minsearch

index = minsearch.Index(
    text_fields=['exercise_name', 'session_name', 'type_of_activity',
       'type_of_equipment', 'body_part', 'type', 'muscle_groups_activated',
       'instructions'],
    keyword_fields=[]
)

In [208]:
index.fit(docs=documents)

<minsearch.Index at 0x7f189eae9d90>

### RAG Flow

In [8]:
prompt_template = """
You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

exercise_name: {exercise_name}
session_name: {session_name}
type_of_activity: {type_of_activity}
type_of_equipment: {type_of_equipment}
body_part: {body_part}
type: {type}
muscle_groups_activated:{muscle_groups_activated}
instructions: {instructions}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [10]:
print(prompt_template.format(**documents[0]))

You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

exercise_name: Cossack Squat
session_name: Aerobic Body Building
type_of_activity: Warm-up
type_of_equipment: Dumbbells
body_part: Full body
type: Pull
muscle_groups_activated:Upper Chest, Shoulders
instructions: Warm-up adequately before performing this exercise to prevent injury.

Provide the output in parsable JSON without using code blocks:

{"questions": ["question1", "question2", ..., "question5"]}


In [17]:
from openai import OpenAI
import json

client = OpenAI()

In [18]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [21]:
prompt = prompt_template.format(**documents[1])
generate_questions = llm(prompt)

In [23]:
print(generate_questions)

{"questions": ["What is the main purpose of performing Glute-Ham Raises in my fitness routine?", "Which muscle groups are targeted by Glute-Ham Raises during the exercise?", "What equipment do I need to perform Glute-Ham Raises correctly?", "Can you explain the correct breathing technique for Glute-Ham Raises?", "Is there a specific session type where Glute-Ham Raises are most effective?"]}


In [24]:
json.loads(generate_questions)

{'questions': ['What is the main purpose of performing Glute-Ham Raises in my fitness routine?',
  'Which muscle groups are targeted by Glute-Ham Raises during the exercise?',
  'What equipment do I need to perform Glute-Ham Raises correctly?',
  'Can you explain the correct breathing technique for Glute-Ham Raises?',
  'Is there a specific session type where Glute-Ham Raises are most effective?']}

In [25]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [26]:
from tqdm.auto import tqdm

In [27]:
results = {}

In [None]:
for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/720 [00:00<?, ?it/s]

In [30]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id,q))

In [31]:
final_results[0]

(0,
 'What is the primary focus of the Cossack Squat exercise during the Aerobic Body Building session?')

In [32]:
df_results = pd.DataFrame(final_results, columns = ['id','question'])

In [34]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [35]:
!head ../data/ground-truth-retrieval.csv

id,question
0,What is the primary focus of the Cossack Squat exercise during the Aerobic Body Building session?
0,What type of equipment is needed for performing the Cossack Squat?
0,What are the main muscle groups activated by the Cossack Squat?
0,Is the Cossack Squat considered a warm-up exercise or a main workout?
0,What should I do before starting the Cossack Squat to avoid injury?
1,What is the primary body part targeted during Glute-Ham Raises?
1,Can Glute-Ham Raises be incorporated into a cardio session?
1,Which muscle groups are activated when performing this exercise?
1,What type of equipment is used for Glute-Ham Raises?


In [214]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [218]:
prompt_template = """
You're a fitness instructor. Answer the QUESTION based on the CONTEXT from the excercise database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

entry_template = """
exercise_name: {exercise_name}
session_name: {session_name}
type_of_activity: {type_of_activity}
type_of_equipment: {type_of_equipment}
body_part: {body_part}
type: {type}
muscle_groups_activated:{muscle_groups_activated}
instructions: {instructions}
""".strip()

In [219]:
def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [220]:
search_results = search(query)
prompt = build_prompt(query, search_results)

In [221]:
print(prompt)

You're a fitness instructor. Answer the QUESTION based on the CONTEXT from the excercise database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: give me leg excercises for hamstrings

CONTEXT: 
exercise_name: Single-leg Deadlifts
session_name: Crossfit
type_of_activity: Strength
type_of_equipment: Dumbbells
body_part: Lower body
type: Pull
muscle_groups_activated:Hamstrings, Glutes
instructions: Balance on one leg and lower the weights to the ground, returning to standing.

exercise_name: Single-leg Kettlebell Swing
session_name: Crossfit
type_of_activity: Strength
type_of_equipment: Kettlebell
body_part: Lower body
type: Pull
muscle_groups_activated:Glutes, Hamstrings, Core
instructions: Swing the kettlebell while standing on one leg, focusing on balance and control.

exercise_name: Single-leg Deadlift
session_name: Crossfit
type_of_activity: Mobility
type_of_equipment: Dumbbells
body_part: Lower body
type: Pull
muscle_groups_activated:Hamstrings, Glutes,

In [222]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [223]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [224]:
answer = rag(query)

In [99]:
print(answer)

Here are some leg exercises that activate the hamstrings:

1. **Front Rack Lunge**
   - Crossfit Session: Crossfit
   - Type of Activity: Warm-up
   - Muscle Groups Activated: Glutes, Hamstrings
   - Instructions: Stretch before and after this exercise for better mobility and recovery.

2. **Plank to Push-up**
   - Crossfit Session: Olympic Weightlifting
   - Type of Activity: Warm-up
   - Muscle Groups Activated: Hamstrings, Glutes
   - Instructions: Stretch before and after this exercise for better mobility and recovery.

3. **Weighted Vest Burpees**
   - Crossfit Session: Olympic Weightlifting
   - Type of Activity: Warm-up
   - Muscle Groups Activated: Hamstrings, Glutes
   - Instructions: Stretch before and after this exercise for better mobility and recovery.

4. **Kettlebell Clean**
   - Crossfit Session: Gymnastics
   - Type of Activity: Cardio
   - Muscle Groups Activated: Glutes, Hamstrings
   - Instructions: Stretch before and after this exercise for better mobility and reco

In [225]:
answer = rag("Need some more excercises for my back")

In [226]:
print(answer)

Here are some exercises that focus on strengthening your back:

1. **Single-arm Row**
   - **Session Name:** Crossfit
   - **Type of Activity:** Strength
   - **Type of Equipment:** Dumbbell
   - **Muscle Groups Activated:** Back
   - **Instructions:** Row a dumbbell towards your abdomen with one arm while supporting your body with the other.

2. **Resistance Band Pull-aparts**
   - **Session Name:** Crossfit
   - **Type of Activity:** Strength
   - **Type of Equipment:** Resistance Band
   - **Muscle Groups Activated:** Back, Shoulders
   - **Instructions:** Hold a band in front and pull it apart to activate your back muscles.

3. **Push-Up Rows**
   - **Session Name:** Crossfit
   - **Type of Activity:** Strength
   - **Type of Equipment:** Bodyweight
   - **Muscle Groups Activated:** Lower Back, Core
   - **Instructions:** Keep your back straight and hinge at the hips for proper form.

4. **Tire Drag**
   - **Session Name:** Olympic Weightlifting
   - **Type of Activity:** Cardio
  

##  Evaluation