In [2]:
import pandas as pd 
import numpy as np
import re 
import json 

In [11]:
import re
import json
import datetime

def parse_workout_data(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()
    
    workouts = []
    current_workout = {}
    current_section = None
    current_exercise = None
    pending_exercises = []  # Stores multiple exercises for supersets

    date_pattern = re.compile(r"^\d{1,2}/\d{2}/\d{2}$")  # Matches dates like 1/04/24
    time_pattern = re.compile(r"(\d{2}:\d{2})")  # Matches times like 15:41
    section_pattern = re.compile(r"^\[(.*?)\]$")  # Matches section headers like [Legs]
    exercise_pattern = re.compile(r"^([A-Za-z].+)$")  # Matches exercise names
    set_pattern = re.compile(r"(\d+lbs|Bodyweight) - (\d+) x (\d+)")  # Matches weight-reps-sets format
    cardio_pattern = re.compile(r"([\d\.]+)\s*(miles|km|meters) - ([\d:]+) pace")  # Matches cardio logs

    for line in lines:
        line = line.strip()

        if not line:
            continue

        if date_pattern.match(line):  # New workout session
            if current_workout:  # Save previous session
                workouts.append(current_workout)

            current_workout = {
                "date": line,
                "location": "",
                "arrival": "",
                "departure": "",
                "locker": "",
                "focus": "",
                "exercises": {}
            }
            current_section = None
            current_exercise = None
            pending_exercises = []  # Reset superset exercises
            continue

        if any(keyword in line for keyword in ["Fitness", "Gym", "Home Workout"]):  # Detect location
            current_workout["location"] = line
            continue

        if "Arrival @" in line or "Departure @" in line:  # Arrival/Departure times
            times = time_pattern.findall(line)
            if times:
                if "Arrival" in line:
                    current_workout["arrival"] = times[0]
                if "Departure" in line:
                    current_workout["departure"] = times[0]
            continue

        if "Locker #" in line:  # Locker number
            current_workout["locker"] = line.split("#")[-1].strip()
            continue

        if not current_workout["focus"]:  # First line after locker is workout focus
            current_workout["focus"] = line
            continue

        if section_match := section_pattern.match(line):  # New muscle group section
            current_section = section_match.group(1)
            current_workout["exercises"][current_section] = {}
            continue

        if "Superset" in line or "Triset" in line:  # Handle supersets properly
            pending_exercises = []  # Reset pending exercises list
            continue

        if exercise_match := exercise_pattern.match(line):  # New exercise
            exercise_name = exercise_match.group(1)
            if " - " in exercise_name:  # Ignore placeholder exercises like "Bodyweight -"
                continue
            
            # Store exercise immediately even if no sets follow
            current_exercise = exercise_name
            pending_exercises = [current_exercise]  # Track current exercise
            if current_section:
                if current_exercise not in current_workout["exercises"][current_section]:
                    current_workout["exercises"][current_section][current_exercise] = []
            continue

        if set_match := set_pattern.match(line):  # Exercise sets
            weight, reps, sets = set_match.groups()
            if current_section:
                for exercise in pending_exercises:  # Assign sets to each tracked exercise
                    if exercise in current_workout["exercises"][current_section]:
                        current_workout["exercises"][current_section][exercise].append({
                            "weight": weight,
                            "reps": int(reps),
                            "sets": int(sets)
                        })
            continue

        if cardio_match := cardio_pattern.match(line):  # Cardio entry
            distance, unit, pace = cardio_match.groups()
            if current_section and current_exercise:
                current_workout["exercises"][current_section][current_exercise].append({
                    "distance": float(distance),
                    "unit": unit,
                    "pace": pace
                })
            continue

    # If departure is missing, set it to arrival + 1 hour
    if current_workout and ("departure" not in current_workout or not current_workout["departure"]):
        if "arrival" in current_workout and current_workout["arrival"]:
            try:
                arrival_time = datetime.datetime.strptime(current_workout["arrival"], "%H:%M")
                departure_time = arrival_time + datetime.timedelta(hours=1)
                current_workout["departure"] = departure_time.strftime("%H:%M")
            except ValueError:
                current_workout["departure"] = "Unknown"
        else:
            current_workout["departure"] = "Unknown"

    if current_workout:  # Save last workout
        workouts.append(current_workout)

    return workouts


# Example usage:
file_path = "Data/aggregate_fitness_notes_2025.txt"  # Replace with your actual file
parsed_workouts = parse_workout_data(file_path)

# Save to JSON for easy viewing
with open("parsed_workouts.json", "w", encoding="utf-8") as json_file:
    json.dump(parsed_workouts, json_file, indent=4)

# Print the first parsed workout as an example
print(json.dumps(parsed_workouts[:1], indent=4))


[
    {
        "date": "1/02/24",
        "location": "LA Fitness Alpharetta",
        "arrival": "16:57",
        "departure": "18:45",
        "locker": "125",
        "focus": "Chest and Back, Calves, Cardio",
        "exercises": {
            "Chest and Back": {
                "Wide-Grip Lat Pulldowns": [
                    {
                        "weight": "100lbs",
                        "reps": 12,
                        "sets": 1
                    },
                    {
                        "weight": "145lbs",
                        "reps": 8,
                        "sets": 1
                    },
                    {
                        "weight": "145lbs",
                        "reps": 8,
                        "sets": 1
                    },
                    {
                        "weight": "130lbs",
                        "reps": 10,
                        "sets": 1
                    },
                    {
                        "weigh

In [None]:
"Data/aggregate_fitness_notes_2025.txt"

In [18]:
import re
import json
import datetime

def parse_workout_data(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()
    
    workouts = []
    current_workout = {}
    current_section = None
    current_exercise = None
    pending_exercises = []
    superset_name = None
    superset_mode = False
    triset_mode = False

    date_pattern = re.compile(r"^\d{1,2}/\d{2}/\d{2}$")  
    time_pattern = re.compile(r"(\d{2}:\d{2})")  
    section_pattern = re.compile(r"^\[(.*?)\]$")  
    exercise_pattern = re.compile(r"^([A-Za-z].+)$")  
    set_pattern = re.compile(r"((\d+(\.\d+)?lbs per|\d+(\.\d+)?lbs|Bodyweight)) - (\d+) x (\d+)")  

    for line in lines:
        line = line.strip()

        if not line:
            continue

        if date_pattern.match(line):  
            if current_workout:  
                workouts.append(current_workout)

            current_workout = {
                "date": line,
                "location": "",
                "arrival": "",
                "departure": "",
                "locker": "",
                "focus": "",
                "exercises": {}
            }
            current_section = None
            current_exercise = None
            pending_exercises = []
            superset_mode = False
            triset_mode = False
            superset_name = None
            continue

        if any(keyword in line for keyword in ["Fitness", "Gym", "Home Workout"]):
            current_workout["location"] = line
            continue

        if "Arrival @" in line or "Departure @" in line:
            times = time_pattern.findall(line)
            if times:
                if "Arrival" in line:
                    current_workout["arrival"] = times[0]
                if "Departure" in line:
                    current_workout["departure"] = times[0]
            continue

        if "Locker #" in line:
            current_workout["locker"] = line.split("#")[-1].strip()
            continue

        if not current_workout["focus"]:
            current_workout["focus"] = line
            continue

        if section_match := section_pattern.match(line):
            current_section = section_match.group(1)
            current_workout["exercises"][current_section] = {}
            print(f"\n🔹 New Section Detected: {current_section}")
            continue

        if "Superset" in line:
            superset_mode = True
            triset_mode = False
            pending_exercises = []
            superset_name = None
            print("\n⚠️ Superset Detected")
            continue

        if "Triset" in line:
            triset_mode = True
            superset_mode = False
            pending_exercises = []
            superset_name = None
            print("\n⚠️ Triset Detected")
            continue

        if exercise_match := exercise_pattern.match(line):
            exercise_name = exercise_match.group(1)

            if " - " in exercise_name:
                continue

            if superset_mode or triset_mode:
                pending_exercises.append(exercise_name)

                if (superset_mode and len(pending_exercises) == 2) or (triset_mode and len(pending_exercises) == 3):
                    superset_name = f"Superset: {' + '.join(pending_exercises)}" if superset_mode else \
                                    f"Triset: {' + '.join(pending_exercises)}"

                    current_workout["exercises"][current_section][superset_name] = {ex: [] for ex in pending_exercises}
                    print(f"✅ Storing {superset_name}")

                    superset_mode = False
                    triset_mode = False

            else:
                current_exercise = exercise_name
                pending_exercises = [current_exercise]
                if current_section:
                    if current_exercise not in current_workout["exercises"][current_section]:
                        current_workout["exercises"][current_section][current_exercise] = []
            
            print(f"✅ Detected Exercise: {exercise_name}")
            continue

        if set_match := set_pattern.match(line):
            weight, reps, sets = set_match.groups()[0], set_match.groups()[-2], set_match.groups()[-1]
            weight = weight.strip()  

            if not pending_exercises:
                print(f"⚠️ Skipping set: {weight}, {reps} reps x {sets} sets (No valid exercise detected yet)")
                continue

            if current_section:
                if superset_name:
                    # Store sets inside the correct Superset structure
                    for ex in pending_exercises:
                        current_workout["exercises"][current_section][superset_name][ex].append({
                            "weight": weight,
                            "reps": int(reps),
                            "sets": int(sets)
                        })
                        print(f"   ➡️ Set recorded for {ex} (part of {superset_name}): {weight}, {reps} reps x {sets} sets")
                else:
                    for exercise in pending_exercises:
                        current_workout["exercises"][current_section][exercise].append({
                            "weight": weight,
                            "reps": int(reps),
                            "sets": int(sets)
                        })
                        print(f"   ➡️ Set recorded for {exercise}: {weight}, {reps} reps x {sets} sets")
            continue

    if current_workout:
        workouts.append(current_workout)

    return workouts


# Example usage:
file_path = "Data/aggregate_fitness_notes_2025.txt"
parsed_workouts = parse_workout_data(file_path)

with open("parsed_workouts.json", "w", encoding="utf-8") as json_file:
    json.dump(parsed_workouts, json_file, indent=4)

print(json.dumps(parsed_workouts[:1], indent=4))


✅ Detected Exercise: Wide-Grip Lat Pulldowns
✅ Detected Exercise: Barbell Incline Bench Presses
✅ Detected Exercise: Barbell Bent-Over Rows
✅ Detected Exercise: DB Incline Pec Flys
✅ Detected Exercise: Close-Grip Bench Presses
✅ Detected Exercise: Seated Cable Rows and DB Straight-Arm Pullovers

🔹 New Section Detected: Chest and Back
✅ Detected Exercise: Wide-Grip Lat Pulldowns
   ➡️ Set recorded for Wide-Grip Lat Pulldowns: 100lbs, 12 reps x 1 sets
   ➡️ Set recorded for Wide-Grip Lat Pulldowns: 145lbs, 8 reps x 1 sets
   ➡️ Set recorded for Wide-Grip Lat Pulldowns: 145lbs, 8 reps x 1 sets
   ➡️ Set recorded for Wide-Grip Lat Pulldowns: 130lbs, 10 reps x 1 sets
   ➡️ Set recorded for Wide-Grip Lat Pulldowns: 100lbs, 12 reps x 1 sets
✅ Detected Exercise: Barbell Incline Bench Presses
   ➡️ Set recorded for Barbell Incline Bench Presses: 45lbs, 12 reps x 1 sets
   ➡️ Set recorded for Barbell Incline Bench Presses: 135lbs, 10 reps x 1 sets
   ➡️ Set recorded for Barbell Incline Bench Pre

KeyError: 'Seated Cable Rows'