In [1]:
import json
import re
from datetime import datetime

def extract_health_data_from_log(json_file_path):
    """
    Parses a conversation log from a JSON file to extract health data points.

    This function simulates an ML model by using regular expressions to find mentions
    of specific health markers and weight check-ins within a raw text conversation.

    Args:
        json_file_path (str): The path to the input JSON file containing the conversation log.

    Returns:
        list: A list of dictionaries, where each dictionary represents a single
              extracted health data point. Returns an empty list if the file is not found
              or is invalid.
    """
    try:
        with open(json_file_path, 'r') as f:
            full_log = json.load(f)
            # The conversation is nested under the "conversation" key
            conversation_log = full_log.get("conversation", [])
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error reading or parsing JSON file: {e}")
        return []

    extracted_data = []

    # Define the health markers we are looking for with their specific regex patterns
    # This helps in accurately capturing different units and naming conventions.
    marker_patterns = {
        'ApoB': r"ApoB:\s*(\d+\.?\d*)\s*mg/dL",
        'hs-CRP': r"hs-CRP:\s*(\d+\.?\d*)\s*mg/L",
        'HbA1c': r"HbA1c:\s*(\d+\.?\d*)\s*%",
        'Fasting Insulin': r"Fasting Insulin:\s*(\d+)\s*µIU/mL",
        'Triglycerides': r"Triglycerides:\s*(\d+)\s*mg/dL",
        'HDL Cholesterol': r"HDL Cholesterol:\s*(\d+)\s*mg/dL",
        'ALT': r"ALT:\s*(\d+)\s*U/L",
        'Total Testosterone': r"Total Testosterone:\s*(\d+)\s*ng/dL",
        'Weight': r"down\s*(\d+\.?\d*)\s*kgs?"  # Pattern for weight check-ins
    }

    # Iterate through each message in the conversation log
    for message in conversation_log:
        timestamp = message.get("timestamp")
        comment = message.get("message")

        # Check for each health marker in the message
        for marker_name, pattern in marker_patterns.items():
            # Use re.findall to capture all occurrences in a message (e.g., in Dr. Warren's reports)
            matches = re.findall(pattern, comment, re.IGNORECASE)
            for match in matches:
                try:
                    value = float(match)
                    extracted_data.append({
                        "marker": marker_name,
                        "value": value,
                        "timestamp": timestamp,
                        "comment": comment.strip()
                    })
                except ValueError:
                    # Handle cases where the match is not a valid number
                    print(f"Warning: Could not convert value '{match}' for marker '{marker_name}' to a number.")
                    continue

    return extracted_data

# --- Example Usage ---
if __name__ == "__main__":
    # 1. First, create a conversation log file named 'conversation.json'
    # The JSON should be in the format: {"conversation": [...]}

    conversation_json = {
      "conversation": [
        { "id": "msg_001", "month": 1, "timestamp": "2025-09-02T11:30:00Z", "sender": "Mohit", "role": "Member", "message": "Ok, I'm signed up. Shared my initial history. The main goal is simple: I need to drop 50kg. Where do we start?" },
        { "id": "msg_002", "month": 1, "timestamp": "2025-09-02T11:32:00Z", "sender": "Ruby", "role": "Concierge", "message": "Welcome aboard, Mohit! We're so excited to have you. The first step is to get a really clear picture of your baseline health. I'm on it and already coordinating with our partner lab to schedule a full diagnostic panel for you next week. I'll send over some time slots shortly." },
        { "id": "msg_003", "month": 1, "timestamp": "2025-09-04T09:05:00Z", "sender": "Mohit", "role": "Member", "message": "Why such a big panel of tests? My last physical was fine." },
        { "id": "msg_004", "month": 1, "timestamp": "2025-09-04T09:10:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "Mohit, Dr. Warren here. That's a fair question. Standard physicals are great for spotting existing disease. We're looking for opportunities to optimize and prevent. To tackle a 50kg weight loss goal safely and effectively, we need to understand your metabolic, hormonal, and cardiovascular starting point in detail. This data is the foundation for a truly personalized plan." },
        { "id": "msg_005", "month": 1, "timestamp": "2025-09-04T09:12:00Z", "sender": "Mohit", "role": "Member", "message": "Makes sense. Data-driven, I like it. Ruby, book the earliest slot available. 👍" },
        { "id": "msg_006", "month": 1, "timestamp": "2025-09-12T14:00:00Z", "sender": "Ruby", "role": "Concierge", "message": "Hi Mohit, just confirming your diagnostic tests are complete. We should start receiving results over the next 7-10 days. The team is eager to dive in and start building your plan!" },
        { "id": "msg_007", "month": 1, "timestamp": "2025-09-18T16:20:00Z", "sender": "Mohit", "role": "Member", "message": "Btw Ruby, heads up I have a business trip to Jakarta for a week starting on the 25th. Just so the team knows." },
        { "id": "msg_008", "month": 1, "timestamp": "2025-09-18T16:22:00Z", "sender": "Ruby", "role": "Concierge", "message": "Thanks for the heads up, Mohit! I've added it to your calendar so the whole team is aware. We'll make sure your plan includes strategies for travel right from the start." },
        { "id": "msg_009", "month": 2, "timestamp": "2025-10-01T10:00:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "Mohit, we have your full blood panel results. I've synthesized them into a report I've just uploaded to your portal, but I want to walk you through the key takeaways here. We focus on 8 critical markers for longevity and metabolic health." },
        { "id": "msg_010", "month": 2, "timestamp": "2025-10-01T10:01:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "1. ApoB: 135 mg/dL\n2. hs-CRP: 3.2 mg/L\n3. HbA1c: 5.9%\n4. Fasting Insulin: 18 µIU/mL" },
        { "id": "msg_011", "month": 2, "timestamp": "2025-10-01T10:02:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "5. Triglycerides: 250 mg/dL\n6. HDL Cholesterol: 35 mg/dL\n7. ALT: 55 U/L\n8. Total Testosterone: 310 ng/dL" },
        { "id": "msg_012", "month": 2, "timestamp": "2025-10-01T10:03:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "The data is clear: we're seeing the early stages of metabolic dysfunction. But the good news is that every single one of these markers can be dramatically improved with the right plan. The 50kg weight loss goal is the most powerful lever we have to fix all of this." },
        { "id": "msg_013", "month": 2, "timestamp": "2025-10-01T10:05:00Z", "sender": "Mohit", "role": "Member", "message": "Wow. Seeing the actual numbers is a huge wake-up call. That ApoB and HbA1c is scary. Ok, I'm 100% committed. What's the plan?" },
        { "id": "msg_014", "month": 2, "timestamp": "2025-10-01T10:10:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Hi Mohit, Carla here. Based on those numbers, I've designed your initial nutrition plan (Plan V1). It's all about whole foods, high protein, and eliminating processed sugars and refined carbs. This will directly target your insulin, triglycerides, and liver health. We'll start slow and build from there." },
        { "id": "msg_015", "month": 2, "timestamp": "2025-10-01T10:12:00Z", "sender": "Rachel", "role": "PT / Physiotherapist", "message": "And I've added your foundational strength training program. Building muscle is one of the best things you can do for your blood sugar and testosterone. It's all about building a strong base. You've got this! Let's get it! 💪" },
        { "id": "msg_016", "month": 2, "timestamp": "2025-10-08T12:00:00Z", "sender": "Mohit", "role": "Member", "message": "Hey Carla, I was reading about creatine and its benefits for strength and even cognitive function. Should that be part of my supplement stack? 🤔" },
        { "id": "msg_017", "month": 2, "timestamp": "2025-10-08T12:05:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Great question, Mohit. I love that you're doing your own research. Yes, creatine is one of the most well-researched supplements out there. Given your goals, it's an excellent addition. Let's start with 5g of creatine monohydrate daily. I've updated your plan, and Ruby can arrange to have it delivered." },
        { "id": "msg_018", "month": 2, "timestamp": "2025-10-15T08:00:00Z", "sender": "Mohit", "role": "Member", "message": "Morning team. Two-week check-in: down 4kgs already! The plan is working. Feeling motivated. 🔥" },
        { "id": "msg_019", "month": 2, "timestamp": "2025-10-15T08:05:00Z", "sender": "Rachel", "role": "PT / Physiotherapist", "message": "YES, Mohit! That's what we love to see. Consistency pays off. How are the workouts feeling? Any soreness?" },
        { "id": "msg_020", "month": 2, "timestamp": "2025-10-29T08:30:00Z", "sender": "Mohit", "role": "Member", "message": "Another 2 weeks, another 3kg down. Total of 7kg so far. My energy levels are way up. Feeling great!" },
        { "id": "msg_021", "month": 2, "timestamp": "2025-10-29T08:35:00Z", "sender": "Advik", "role": "Performance Scientist", "message": "That's fantastic news, Mohit! Your CMF watch data is telling the same story - your resting heart rate is already trending down. That's a great sign of improved cardiovascular fitness. The hard work is paying off." },
        { "id": "msg_022", "month": 3, "timestamp": "2025-11-10T09:20:00Z", "sender": "Mohit", "role": "Member", "message": "Team, getting frustrated here. The scale hasn't moved in a week and I'm sticking to the plan. What's going on? 😠" },
        { "id": "msg_023", "month": 3, "timestamp": "2025-11-10T09:22:00Z", "sender": "Advik", "role": "Performance Scientist", "message": "Hey Mohit, thanks for flagging this. Plateaus are a completely normal part of the process, and honestly, we expect them. It's a sign your body is adapting. Let me pull up your CMF watch data right now. Carla and I will investigate and get back to you with a plan today." },
        { "id": "msg_024", "month": 3, "timestamp": "2025-11-10T15:00:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Ok Mohit, we've analyzed your food logs and Advik has checked your sleep data. We noticed your sleep quality dropped significantly during your trip to the US last week. We think the combo of jet lag and a few too many 'healthy' airport snacks is the culprit. Your body is a bit stressed, which can cause it to hold onto weight." },
        { "id": "msg_025", "month": 3, "timestamp": "2025-11-10T15:05:00Z", "sender": "Rachel", "role": "PT / Physiotherapist", "message": "To break through this, we need to introduce a new stimulus. I'm adjusting your workout. We're going to swap one strength day for a high-intensity interval session to shake up your metabolism. The new plan is in your app. It'll be challenging, but fun!" },
        { "id": "msg_026", "month": 3, "timestamp": "2025-11-10T15:06:00Z", "sender": "Mohit", "role": "Member", "message": "Ok, let's try it. I trust the process." },
        { "id": "msg_027", "month": 3, "timestamp": "2025-11-18T08:00:00Z", "sender": "Mohit", "role": "Member", "message": "IT WORKED! The scale is moving again. Down another 1.5kg. The new workout is tough but effective. 🙏" },
        { "id": "msg_028", "month": 4, "timestamp": "2025-12-05T11:00:00Z", "sender": "Ruby", "role": "Concierge", "message": "Hi Mohit, it's that time again! I've booked your second 3-month diagnostic panel for next week. Let's see all the amazing progress you've made." },
        { "id": "msg_029", "month": 4, "timestamp": "2025-12-15T14:00:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "Mohit, the results from your second panel are in, and the improvement is fantastic. This is the data that proves the process works. Let's compare to 3 months ago:" },
        { "id": "msg_030", "month": 4, "timestamp": "2025-12-15T14:01:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "ApoB: 105 mg/dL\nhs-CRP: 1.5 mg/L\nHbA1c: 5.7%\nFasting Insulin: 9 µIU/mL\nTriglycerides: 150 mg/dL\nTestosterone: 350 ng/dL\nHDL Cholesterol: 45 mg/dL\nALT: 45 U/L" },
        { "id": "msg_031", "month": 4, "timestamp": "2025-12-15T14:05:00Z", "sender": "Mohit", "role": "Member", "message": "That's the data I wanted to see! The HbA1c change is huge for me. Let's keep this momentum going. What's next?" },
        { "id": "msg_032", "month": 4, "timestamp": "2025-12-15T14:10:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Time for Plan V2! To keep the progress coming, we're going to introduce carb cycling. This will help accelerate fat loss while giving you more energy for your intense workouts. I've laid it all out in your portal." },
        { "id": "msg_033", "month": 4, "timestamp": "2025-12-22T10:00:00Z", "sender": "Mohit", "role": "Member", "message": "Hey Rachel, I saw a video about 'progressive overload'. Is that what we're doing with my strength plan? Just want to make sure I'm lifting correctly to keep seeing results." },
        { "id": "msg_034", "month": 4, "timestamp": "2025-12-22T10:05:00Z", "sender": "Rachel", "role": "PT / Physiotherapist", "message": "Exactly right, Mohit! You've hit on the most important principle of strength training. Each week, we aim to increase the demand slightly, either by adding a little more weight, doing one more rep, or improving your form. That's why logging your workouts is so important. You're doing great." },
        { "id": "msg_035", "month": 5, "timestamp": "2026-01-08T18:00:00Z", "sender": "Mohit", "role": "Member", "message": "Ugh. Bad news. I was rushing through the airport in London and tweaked my lower back. It's really sore. 😫" },
        { "id": "msg_036", "month": 5, "timestamp": "2026-01-08T18:05:00Z", "sender": "Rachel", "role": "PT / Physiotherapist", "message": "Ok, no problem Mohit. Don't panic. First, no lifting. At all. I'm swapping your entire workout plan for a rehab and mobility protocol. The goal right now is recovery, not fat loss. We need to fix this before it becomes a chronic issue. I've sent you some gentle stretches you can do in your hotel room." },
        { "id": "msg_037", "month": 5, "timestamp": "2026-01-15T10:00:00Z", "sender": "Mohit", "role": "Member", "message": "This is so frustrating. I feel like I'm losing all my progress." },
        { "id": "msg_038", "month": 5, "timestamp": "2026-01-15T10:05:00Z", "sender": "Neel", "role": "Concierge Lead", "message": "Mohit, Neel here. I completely understand the frustration. It feels like a step back, but think of it as a strategic pause. A week of focused rehab now will prevent months of pain later. Your long-term health is the priority. Trust the process. We'll get you back on track as soon as you're ready." },
        { "id": "msg_039", "month": 5, "timestamp": "2026-01-22T11:30:00Z", "sender": "Mohit", "role": "Member", "message": "Carla, I've been reading about anti-inflammatory foods to help with this back issue. Anything specific I should be eating more of?" },
        { "id": "msg_040", "month": 5, "timestamp": "2026-01-22T11:35:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Proactive thinking, I love it. Let's load up on Omega-3s. I'll have Ruby arrange for some high-quality salmon to be delivered for your meals. Also, focus on leafy greens and berries. I've added some notes to your plan." },
        { "id": "msg_041", "month": 6, "timestamp": "2026-02-02T09:00:00Z", "sender": "Mohit", "role": "Member", "message": "Back is feeling 100%. The rehab plan worked. Ready to get back to it." },
        { "id": "msg_042", "month": 6, "timestamp": "2026-02-02T09:05:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "Glad to hear it. To take your nutrition to the next level and really dial in the fat loss, we're going to send you a Continuous Glucose Monitor (CGM). It's a small sensor that tracks your blood sugar in real-time. It will give us incredible data to work with." },
        { "id": "msg_043", "month": 6, "timestamp": "2026-02-10T08:45:00Z", "sender": "Mohit", "role": "Member", "message": "This CGM is a game changer! I had no idea oatmeal spiked my sugar so much. But eggs and avocado barely move the needle. This is fascinating. 🤔" },
        { "id": "msg_044", "month": 6, "timestamp": "2026-02-10T08:50:00Z", "sender": "Carla", "role": "Nutritionist", "message": "Isn't it incredible? This is personalized nutrition in action. We're learning the exact 'fuel mix' that works for your body. Let's use this data to fine-tune your meals." },
        { "id": "msg_045", "month": 6, "timestamp": "2026-02-20T17:00:00Z", "sender": "Mohit", "role": "Member", "message": "Advik, I keep reading about the importance of sleep for fat loss. Can you explain the mechanism? I want to understand the 'why'." },
        { "id": "msg_046", "month": 6, "timestamp": "2026-02-20T17:05:00Z", "sender": "Advik", "role": "Performance Scientist", "message": "Excellent question. It's all about hormones. Poor sleep increases cortisol (a stress hormone) which can promote fat storage, and it disrupts ghrelin and leptin, the hormones that control hunger and fullness. So you feel hungrier and less satisfied. By prioritizing sleep, you're creating a much better hormonal environment for fat loss. Your CMF watch sleep score is a key metric for us to track." },
        { "id": "msg_047", "month": 7, "timestamp": "2026-03-06T13:00:00Z", "sender": "Ruby", "role": "Concierge", "message": "Time flies! Your third diagnostic panel is booked for next week, Mohit. Can't wait to see the results." },
        { "id": "msg_048", "month": 7, "timestamp": "2026-03-16T15:00:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "Mohit, the results from your third panel are in, and frankly, they're phenomenal. You've lost 28kg, and your bloodwork reflects that transformation. Let's look at the numbers:" },
        { "id": "msg_049", "month": 7, "timestamp": "2026-03-16T15:01:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "ApoB: 82 mg/dL\nhs-CRP: 0.8 mg/L\nHbA1c: 5.4%\nFasting Insulin: 4 µIU/mL\nTriglycerides: 95 mg/dL\nALT: 25 U/L\nTestosterone: 480 ng/dL\nHDL Cholesterol: 55 mg/dL" },
        { "id": "msg_050", "month": 7, "timestamp": "2026-03-16T15:02:00Z", "sender": "Dr. Warren", "role": "Medical Strategist", "message": "From a cardiovascular and metabolic risk standpoint, you are a different person than you were 6 months ago. This is a testament to your consistency and dedication." },
        { "id": "msg_051", "month": 7, "timestamp": "2026-03-16T15:05:00Z", "sender": "Mohit", "role": "Member", "message": "Wow. Seeing it all laid out like that... it's incredible. I feel so much better, but seeing the internal changes is what it's all about. Now that my health markers are good, can we focus on my cognitive performance goal?" },
        { "id": "msg_052", "month": 7, "timestamp": "2026-03-16T15:10:00Z", "sender": "Advik", "role": "Performance Scientist", "message": "Absolutely. Your improved metabolic health is the perfect foundation for better cognitive function. I'm updating your plan to include specific protocols for improving sleep quality and stress resilience, which are key for mental clarity. We'll use your CMF watch data to track this closely." },
        { "id": "msg_053", "month": 7, "timestamp": "2026-03-25T07:30:00Z", "sender": "Mohit", "role": "Member", "message": "Read an interesting article on cold exposure... are cold showers in the morning actually beneficial or just hype?" },
        { "id": "msg_054", "month": 7, "timestamp": "2026-03-25T07:35:00Z", "sender": "Advik", "role": "Performance Scientist", "message": "There's solid science behind it. Cold exposure can increase norepinephrine, which boosts focus and alertness. It can also improve resilience. It's not magic, but it's a powerful tool. We can add it as an optional 'experiment' to your morning routine if you'd like to try it." },
        { "id": "msg_055", "month": 8, "timestamp": "2026-04-02T09:30:00Z", "sender": "Mohit", "role": "Member", "message": "Quick heads up, I'll be in South Korea for 10 days starting next Friday. It's a big trip. What can we do to make sure I stay on track?" },
        { "id": "msg_056", "month": 8, "timestamp": "2026-04-02T09:35:00Z", "sender": "Ruby", "role": "Concierge", "message": "Noted, Mohit. The team will build you a comprehensive 'Seoul Travel Protocol'. Rachel will find a suitable gym near your hotel, and Carla will provide a guide to healthy Korean food choices. We'll make sure you're all set." },
        { "id": "msg_057", "month": 8, "timestamp": "2026-04-10T11:00:00Z", "sender": "Neel", "role": "Concierge Lead", "message": "Mohit, as we come to the end of this 8-month block, I've been reviewing your entire journey. The transformation is incredible. How are you feeling?" },
        { "id": "msg_058", "month": 8, "timestamp": "2026-04-10T11:05:00Z", "sender": "Mohit", "role": "Member", "message": "Like a new man. I have more energy, I'm more focused at work, and I'm over halfway to my 50kg goal. This has been life-changing. I'm ready to talk about the plan for the next year." },
        { "id": "msg_059", "month": 8, "timestamp": "2026-04-10T11:10:00Z", "sender": "Neel", "role": "Concierge Lead", "message": "That's what we love to hear. Let's set up a call for next week with the whole team to map out the next phase. The goal now is to make this sustainable for life and to continue pushing the boundaries of your performance. Congratulations on everything you've achieved." }
      ]
    }

    file_name = "conversation.json"
    with open(file_name, 'w') as f:
        json.dump(conversation_json, f, indent=4)

    # 2. Run the extraction model on the file
    health_data = extract_health_data_from_log(file_name)

    # 3. Print the structured output
    if health_data:
        # 4. EXPORT THE EXTRACTED DATA TO A NEW JSON FILE
        output_file_name = "extracted_health_data.json"
        with open(output_file_name, 'w') as f:
            json.dump(health_data, f, indent=4)

        print(f"Successfully extracted {len(health_data)} data points and saved to '{output_file_name}'")

Successfully extracted 23 data points and saved to 'extracted_health_data.json'
