In [2]:
import pandas as pd 
import numpy as np
import re 
import json 

In [9]:
import re
import json
import datetime

def parse_workout_data(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()
    
    workouts = []
    current_workout = {}
    current_section = None
    current_exercise = None

    date_pattern = re.compile(r"^\d{1,2}/\d{2}/\d{2}$")  # Matches dates like 1/04/24
    time_pattern = re.compile(r"(\d{2}:\d{2})")  # Matches times like 15:41
    section_pattern = re.compile(r"^\[(.*?)\]$")  # Matches section headers like [Legs]
    exercise_pattern = re.compile(r"^([A-Za-z].+)$")  # Matches exercise names
    set_pattern = re.compile(r"(\d+lbs|Bodyweight) - (\d+) x (\d+)")  # Matches weight-reps-sets format
    cardio_pattern = re.compile(r"([\d\.]+)\s*(miles|km|meters) - ([\d:]+) pace")  # Matches cardio logs

    for line in lines:
        line = line.strip()

        if not line:
            continue

        if date_pattern.match(line):  # New workout session
            if current_workout:  # Save previous session
                workouts.append(current_workout)

            current_workout = {
                "date": line,
                "location": "",
                "arrival": "",
                "departure": "",
                "locker": "",
                "focus": "",
                "exercises": {}
            }
            current_section = None
            current_exercise = None
            continue

        if any(keyword in line for keyword in ["Fitness", "Gym", "Home Workout"]):  # Flexible location detection
            current_workout["location"] = line
            continue

        if "Arrival @" in line or "Departure @" in line:  # Arrival/Departure times
            times = time_pattern.findall(line)
            if times:
                if "Arrival" in line:
                    current_workout["arrival"] = times[0]
                if "Departure" in line:
                    current_workout["departure"] = times[0]
            continue

        if "Locker #" in line:  # Locker number
            current_workout["locker"] = line.split("#")[-1].strip()
            continue

        if not current_workout["focus"]:  # First line after locker is workout focus
            current_workout["focus"] = line  # Properly storing focus now
            continue

        if section_match := section_pattern.match(line):  # New muscle group section
            current_section = section_match.group(1)
            current_workout["exercises"][current_section] = {}
            continue

        if "Superset" in line or "Triset" in line:  # Ignore Superset/Triset for now
            current_exercise = None
            continue

        if exercise_match := exercise_pattern.match(line):  # New exercise
            exercise_name = exercise_match.group(1)
            if " - " in exercise_name:  # Ignore placeholder exercises like "Bodyweight -"
                continue
            current_exercise = exercise_name
            if current_section:
                current_workout["exercises"][current_section][current_exercise] = []
            continue

        if set_match := set_pattern.match(line):  # Exercise sets
            weight, reps, sets = set_match.groups()
            if current_section and current_exercise:
                current_workout["exercises"][current_section][current_exercise].append({
                    "weight": weight,
                    "reps": int(reps),
                    "sets": int(sets)
                })
            continue

        if cardio_match := cardio_pattern.match(line):  # Cardio entry
            distance, unit, pace = cardio_match.groups()
            if current_section and current_exercise:
                current_workout["exercises"][current_section][current_exercise].append({
                    "distance": float(distance),
                    "unit": unit,
                    "pace": pace
                })
            continue

    # If departure is missing, set it to arrival + 1 hour
    if current_workout and ("departure" not in current_workout or not current_workout["departure"]):
        if "arrival" in current_workout and current_workout["arrival"]:
            try:
                arrival_time = datetime.datetime.strptime(current_workout["arrival"], "%H:%M")
                departure_time = arrival_time + datetime.timedelta(hours=1)
                current_workout["departure"] = departure_time.strftime("%H:%M")
            except ValueError:
                current_workout["departure"] = "Unknown"
        else:
            current_workout["departure"] = "Unknown"

    if current_workout:  # Save last workout
        workouts.append(current_workout)

    return workouts


# Example usage:
file_path = "Data/aggregate_fitness_notes_2025.txt"  # Replace with your actual file
parsed_workouts = parse_workout_data(file_path)

# Save to JSON for easy viewing
with open("parsed_workouts.json", "w", encoding="utf-8") as json_file:
    json.dump(parsed_workouts, json_file, indent=4)

# Print the first parsed workout as an example
print(json.dumps(parsed_workouts[:1], indent=4))


[
    {
        "date": "1/02/24",
        "location": "LA Fitness Alpharetta",
        "arrival": "16:57",
        "departure": "18:45",
        "locker": "125",
        "focus": "Chest and Back, Calves, Cardio",
        "exercises": {
            "Chest and Back": {
                "Wide-Grip Lat Pulldowns": [
                    {
                        "weight": "100lbs",
                        "reps": 12,
                        "sets": 1
                    },
                    {
                        "weight": "145lbs",
                        "reps": 8,
                        "sets": 1
                    },
                    {
                        "weight": "145lbs",
                        "reps": 8,
                        "sets": 1
                    },
                    {
                        "weight": "130lbs",
                        "reps": 10,
                        "sets": 1
                    },
                    {
                        "weigh