<a href="https://colab.research.google.com/github/PratyushAggarwal1/BiteSpeed/blob/main/NaptickTask2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
# -*- coding: utf-8 -*-
"""Naptick_Task2.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1sub3Kuwk4MbfOuGeZN_IuzROLd1G9zXI
"""

print("⏳ Installing STT, TTS, and Audio Handling libraries...")
!pip install -q faster-whisper soundfile SpeechRecognition ipython

print("⏳ Installing Core ML, LLM, and Fine-Tuning libraries...")
# Keep datasets upgrade attempt just in case, though it didn't fix the core issue before
print("   Attempting to upgrade 'datasets' library...")
!pip install -q -U datasets

# Now install the rest of the ML stack
!pip install -q transformers accelerate peft bitsandbytes sentencepiece torch

print("⏳ Installing TTS library...")
!pip install -q piper-tts

print("\n--- Main Installations Attempted ---")

# --- Dependency Conflict Resolution ---
# Reasoning: Force install the version of fsspec REQUIRED BY 'datasets'.
# This WILL likely conflict with 'gcsfs', but we prioritize the libraries
# WE are directly using (datasets, transformers, etc.) and see if the gcsfs conflict
# actually causes a runtime error later in our specific workflow.
print("\n⏳ Forcing fsspec version compatible with 'datasets'...")
!pip install -U fsspec==2025.3.0 # <-- FORCE version needed by datasets
print("⏳ Installing Gradio")
!pip install -q gradio
print("\n✅ All dependency installations and conflict resolutions attempted.")
print("   ⚠️ NOTE: Expect a potential conflict warning regarding 'gcsfs' needing a different fsspec version.")
print("   We will proceed and see if this causes issues during model/data loading or training.")

import torch
print("\n--- Verifying GPU Access ---")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    print(f"✅ GPU detected: {gpu_name}")
    # Display PyTorch and CUDA versions for debugging purposes.
    print(f"   PyTorch Version: {torch.__version__}")
    print(f"   CUDA Version: {torch.version.cuda}")
    print("   GPU is ready for model loading and computations.")
else:
    print("❌ WARNING: No GPU detected by PyTorch!")
    print("   >>> Action Required: Go to 'Runtime' -> 'Change runtime type', select 'T4 GPU', and click 'Save'. <<<")
    print("   Then, RESTART the runtime ('Runtime' -> 'Restart runtime') and run this cell again.")
    print("   Proceeding without a GPU will make LLM operations extremely slow.")
print("--- GPU Verification Complete ---")

import faster_whisper
import torch
import os
stt_model_size = "base.en"
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if torch.cuda.is_available() else "int8"
stt_model = None # Initialize variable to hold the model
print(f"⏳ Loading Whisper STT model '{stt_model_size}' onto {device} (compute: {compute_type})...")
try:
    stt_model = faster_whisper.WhisperModel(stt_model_size, device=device, compute_type=compute_type)
    print(f"✅ Whisper STT model ('{stt_model_size}') loaded successfully.")
except Exception as e:
    print(f"❌ Error loading Whisper model: {e}")
    print("   This could be due to insufficient GPU memory. Consider trying a smaller model size (e.g., 'tiny.en').")
    print("   Also ensure the GPU runtime is active.")

!pip install -q -U huggingface_hub
print("Done")

# @title Step 3: Load TTS Model (Piper) - FINAL WORKING VERSION

# --- Ensure latest huggingface_hub is installed (optional here, but good practice) ---
# print("⏳ Upgrading huggingface_hub library...")
# !pip install -q -U huggingface_hub
# print("✅ huggingface_hub upgrade attempt complete.")

from pathlib import Path
from piper import PiperVoice
import os
import time
try:
    from huggingface_hub import hf_hub_download
except ImportError:
    print("❌ ERROR: huggingface_hub library not found. Please install it: !pip install -q huggingface_hub")
    raise SystemExit("Stopping: huggingface_hub not installed.")


# --- Configuration ---
voice_repo_id = "rhasspy/piper-voices"
voice_lang = "en"
voice_region = "en_US"
voice_family = "lessac"
voice_quality = "medium"
voice_repo_path_prefix = f"{voice_lang}/{voice_region}/{voice_family}/{voice_quality}"
voice_name = f"{voice_region}-{voice_family}-{voice_quality}"
onnx_filename = f"{voice_name}.onnx"
json_filename = f"{voice_name}.onnx.json"

print(f"--- TTS Configuration ---")
print(f"Repo ID: {voice_repo_id}")
print(f"Voice Path Prefix: {voice_repo_path_prefix}")
print(f"Onnx File: {onnx_filename}")
print(f"Json File: {json_filename}")
print(f"-------------------------")

force_redownload = False # Keep False unless needed

model_dir = Path("./piper_models")
model_dir.mkdir(exist_ok=True)
voice_model_path = model_dir / onnx_filename
voice_config_path = model_dir / json_filename

if force_redownload:
    print("🧹 Force redownload enabled. Deleting existing local TTS model files...")
    # (Deletion code kept for future use if needed)
    if voice_model_path.is_file():
        try: os.remove(voice_model_path); print(f"   Deleted: {voice_model_path}")
        except OSError as e: print(f"   Warning: Could not delete {voice_model_path}: {e}")
    if voice_config_path.is_file():
        try: os.remove(voice_config_path); print(f"   Deleted: {voice_config_path}")
        except OSError as e: print(f"   Warning: Could not delete {voice_config_path}: {e}")
    print("   Deletion attempt complete.")

# --- Download using hf_hub_download ---
print(f"\n⏳ Downloading TTS files using huggingface_hub (if needed)...")
download_ok = True
actual_onnx_path_returned = None
actual_json_path_returned = None
try:
    onnx_repo_filepath = f"{voice_repo_path_prefix}/{onnx_filename}"
    json_repo_filepath = f"{voice_repo_path_prefix}/{json_filename}"

    # Download ONNX
    actual_onnx_path_returned = hf_hub_download(
        repo_id=voice_repo_id, filename=onnx_repo_filepath, cache_dir=None,
        force_download=force_redownload, local_dir=str(model_dir), local_dir_use_symlinks=False
    )
    if Path(actual_onnx_path_returned).resolve() != voice_model_path.resolve():
         import shutil
         try:
            shutil.copyfile(actual_onnx_path_returned, voice_model_path)
            print(f"   Copied ONNX file to expected path: {voice_model_path}")
         except Exception as copy_e:
             print(f"   Warning: Failed to copy ONNX file: {copy_e}. Using cached path: {actual_onnx_path_returned}")
             voice_model_path = Path(actual_onnx_path_returned)
    else:
        print(f"   ONNX file found/verified at: {voice_model_path}")


    # Download JSON
    actual_json_path_returned = hf_hub_download(
        repo_id=voice_repo_id, filename=json_repo_filepath, cache_dir=None,
        force_download=force_redownload, local_dir=str(model_dir), local_dir_use_symlinks=False
    )
    if Path(actual_json_path_returned).resolve() != voice_config_path.resolve():
         import shutil
         try:
            shutil.copyfile(actual_json_path_returned, voice_config_path)
            print(f"   Copied JSON file to expected path: {voice_config_path}")
         except Exception as copy_e:
             print(f"   Warning: Failed to copy JSON file: {copy_e}. Using cached path: {actual_json_path_returned}")
             voice_config_path = Path(actual_json_path_returned)
    else:
         print(f"   JSON file found/verified at: {voice_config_path}")

except Exception as e:
    print(f"❌ Error during hf_hub_download process: {e}")
    download_ok = False

# --- Verification AFTER Download Attempt ---
print("\n Verifying local files again before loading...")
model_ok = False
config_ok = False
min_file_size = 100

if voice_model_path.is_file() and os.path.getsize(voice_model_path) > min_file_size: model_ok = True
if voice_config_path.is_file() and os.path.getsize(voice_config_path) > 0: config_ok = True

if not model_ok: print(f"   ❌ Verification Failed: Model file issue at {voice_model_path}")
if not config_ok: print(f"   ❌ Verification Failed: Config file issue at {voice_config_path}")

# --- Load Piper Voice (Using Correct Attribute) ---
tts_voice = None
tts_sample_rate = 22050 # Keep default as fallback

if model_ok and config_ok:
    print("\n⏳ Loading TTS voice model into memory...")
    try:
        tts_voice = PiperVoice.load(str(voice_model_path), str(voice_config_path))
        # --- Use the CORRECT attribute to get sample rate ---
        if hasattr(tts_voice, 'config') and hasattr(tts_voice.config, 'sample_rate'):
            tts_sample_rate = tts_voice.config.sample_rate
            print(f"✅ TTS voice model '{voice_name}' loaded successfully.")
            print(f"   Retrieved Sample Rate: {tts_sample_rate} Hz")
        else:
            # This case should not happen based on previous debug, but included for safety
            print(f"✅ TTS voice model '{voice_name}' loaded, but couldn't retrieve sample rate automatically.")
            print(f"   Using default Sample Rate: {tts_sample_rate} Hz")

    except Exception as e:
        print(f"❌ Error loading TTS model: {e}")
else:
    print("\n❌ Skipping TTS model loading due to file verification failure.")

import huggingface_hub
from google.colab import userdata
import os

try:
  hf_token=userdata.get('HF_TOKEN')
  huggingface_hub.login(token=hf_token,add_to_git_credential=True)
  print("Successfully logged into Hugging Face Hub")
except userdata.SecretNotFoundError:
  print("HF_Token secret not found")
except Exception as e:
  print("An error occured during login")

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# --- Configuration ---
# Reasoning: Specify the model identifier from the Hugging Face Hub.
# "mistralai/Mistral-7B-Instruct-v0.2" is chosen for its strong performance in conversation/instruction-following
# and its relatively manageable size (7 billion parameters) when quantized.
base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

# --- Quantization Configuration (bitsandbytes) ---
# Reasoning: Define how to load the model with reduced precision to save memory.
# load_in_4bit=True: Activates 4-bit quantization.
# bnb_4bit_quant_type="nf4": Specifies the "NormalFloat 4-bit" quantization type, a good default.
# bnb_4bit_compute_dtype=torch.bfloat16: Sets the data type used for computations *during* inference/training.
#    bfloat16 offers a good balance of speed and stability on compatible GPUs (like Colab's T4).
# bnb_4bit_use_double_quant=True: A technique that can save a small amount of additional memory.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# --- Load Base LLM Model ---
llm_model = None # Initialize variable
print(f"⏳ Loading base LLM: '{base_model_id}' with 4-bit quantization...")
print("   This requires downloading several GB and can take 5-15 minutes...")
try:
    # Reasoning: Load the Causal Language Model using AutoModelForCausalLM.
    # quantization_config=bnb_config: Applies the 4-bit loading defined above.
    # device_map="auto": Tells Accelerate to automatically distribute the model layers across available devices (primarily the GPU).
    # trust_remote_code=True: Sometimes required for models with custom code defining their architecture.
    llm_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )
    # Verify model loading and device placement
    print(f"✅ Base LLM model loaded successfully.")
    print(f"   Model is on device(s): {llm_model.hf_device_map}") # Show device map

except Exception as e:
    # Reasoning: Catch errors, especially Out-of-Memory (OOM), which are common on resource-constrained environments like Colab free tier.
    print(f"❌ Error loading base LLM model: {e}")
    print("   >>> COMMON FIXES: <<<")
    print("   1. OUT OF MEMORY: Go to 'Runtime' -> 'Disconnect and delete runtime', then reconnect and run cells again.")
    print("   2. Ensure T4 GPU is selected in Runtime settings.")
    print("   3. Close other Colab notebooks or browser tabs consuming memory.")
    print("   4. If errors persist, Colab might be temporarily overloaded; try again later.")
    llm_model = None # Ensure variable is None if loading failed

# --- Load LLM Tokenizer ---
llm_tokenizer = None # Initialize variable
# Reasoning: Only proceed if the model loaded successfully.
if llm_model:
    print("\n⏳ Loading LLM Tokenizer...")
    try:
        # Reasoning: Load the tokenizer associated with the specific base model.
        # The tokenizer converts text into numerical sequences (tokens) the model understands.
        llm_tokenizer = AutoTokenizer.from_pretrained(base_model_id)

        # --- Tokenizer Configuration ---
        # Reasoning: Set the padding token ID. For many causal LMs, the End-Of-Sentence (EOS) token
        # is used for padding. This tells the model to ignore these tokens during processing.
        llm_tokenizer.pad_token = llm_tokenizer.eos_token
        # Reasoning: Set the side on which padding is added. 'right' is often preferred for causal LMs
        # during generation to prevent issues.
        llm_tokenizer.padding_side = "right"

        print("✅ LLM Tokenizer loaded and configured successfully.")
    except Exception as e:
        print(f"❌ Error loading tokenizer: {e}")
        llm_tokenizer = None
else:
    print("\nSkipping tokenizer loading because the base LLM failed to load.")

# @title Step 5: Define "Listen" Function (Audio Transcription)
# Reasoning: Define the functions that handle audio input and convert it to text using the loaded Whisper model.

from google.colab import files # For Colab's file upload UI.
import io # For handling byte streams (though not directly used here, often useful).
import soundfile as sf # For potentially reading audio file info if needed.
import numpy as np # Whisper often works well with NumPy arrays.
import time # For timing the transcription process.
import os # For file operations (checking existence, removing temp files).

# --- Core Transcription Function ---
def transcribe_audio(audio_path_or_numpy_array):
    """
    Transcribes audio using the loaded Whisper STT model.

    Args:
        audio_path_or_numpy_array:`` Path to the audio file OR a NumPy array containing audio data.

    Returns:
        The transcribed text (str) or None if transcription fails.
    """
    # Reasoning: Safety check to ensure the STT model object is available.
    if not stt_model:
        print("❌ Error: Whisper STT model ('stt_model') is not loaded.")
        return None
    try:
        print(f"\n⏳ Transcribing audio...")
        start_time = time.time()
        # Reasoning: Call the transcribe method of the loaded faster-whisper model.
        # beam_size=5: A common setting for beam search decoding, balances speed and accuracy.
        # language="en": Hints that the expected language is English (can improve accuracy if known).
        segments, info = stt_model.transcribe(audio_path_or_numpy_array, beam_size=5, language="en")

        # Reasoning: The model returns segments; concatenate their text attributes to form the full transcription.
        # .strip() removes leading/trailing whitespace.
        transcription = "".join(segment.text for segment in segments).strip()
        end_time = time.time()
        duration = end_time - start_time

        # Reasoning: Provide informative output about the transcription process.
        print(f"   🗣️ Transcription: '{transcription}'")
        print(f"   🌐 Detected language: {info.language} (Confidence: {info.language_probability:.2f})")
        # print(f"   ⏱️ (Transcription time: {duration:.2f} seconds)") # Optional: uncomment for timing info
        return transcription
    except Exception as e:
        print(f"❌ Error during Whisper transcription: {e}")
        return None

# --- Colab Upload Handler ---
def transcribe_uploaded_audio():
    """
    Handles audio file upload in Colab, transcribes it, and cleans up.

    Returns:
        The transcribed text (str) or None if the process fails.
    """
    print("\n🎙️ Please upload an audio file (e.g., WAV, MP3, M4A)...")
    # Reasoning: Use Colab's built-in 'files.upload()' to trigger the browser's upload dialog.
    # This returns a dictionary where keys are filenames and values are file content in bytes.
    uploaded = files.upload()

    if not uploaded:
        print("   No file uploaded or upload cancelled.")
        return None

    # Assume only one file is uploaded, get its name and byte content.
    file_name = list(uploaded.keys())[0]
    audio_bytes = uploaded[file_name]
    print(f"   File '{file_name}' uploaded ({len(audio_bytes)} bytes).")

    # Reasoning: Define a temporary path within Colab's filesystem to save the uploaded bytes.
    # While faster-whisper can sometimes handle bytes directly, using a file path is often more robust.
    temp_audio_path = f"./temp_audio_for_transcription_{file_name}"
    transcription_result = None
    try:
        # Write the uploaded bytes to the temporary file.
        with open(temp_audio_path, 'wb') as f:
            f.write(audio_bytes)
        # Call the core transcription function using the path to the temporary file.
        transcription_result = transcribe_audio(temp_audio_path)
    except Exception as e:
         print(f"   ❌ Error processing or writing uploaded file: {e}")
         transcription_result = None
    finally:
        # Reasoning: CRITICAL - Always attempt to delete the temporary file afterwards to avoid cluttering
        # Colab's limited disk space, regardless of whether transcription succeeded or failed.
        if os.path.exists(temp_audio_path):
            try:
                os.remove(temp_audio_path)
                # print(f"   🧹 Temporary file '{temp_audio_path}' deleted.") # Optional: uncomment for confirmation
            except OSError as e:
                # Non-critical error if deletion fails, just log a warning.
                print(f"   ⚠️ Warning: Could not delete temporary file {temp_audio_path}: {e}")

    return transcription_result

print("\n✅ 'Listen' functions defined (transcribe_audio, transcribe_uploaded_audio).")
# You can uncomment the example usage lines in the previous version's Step 5 cell to test upload now if desired.

# Example Usage (Optional - uncomment to test)
# print("\n--- Testing Audio Upload ---")
# text_result = transcribe_uploaded_audio()
# if text_result:
#     print(f"\n✅ Successfully transcribed: '{text_result}'")
# else:
#     print("\n❌ Audio transcription test failed.")
# print("--- End Test ---")

# @title Step 6: Define "Think" Function (LLM Interaction & History Management)
# Reasoning: Define the function that takes user input, interacts with the LLM (base or fine-tuned),
# manages conversation history for context, and returns the LLM's response.

import time
from transformers import StoppingCriteria, StoppingCriteriaList # Potentially useful later, import now

# --- Conversation History Storage ---
# Reasoning: Use simple Python lists to store the conversation turns. Each turn is a dictionary
# with 'role' ('user' or 'assistant') and 'content' (the text). Maintain separate lists
# to allow comparison between the base model and the fine-tuned model later.
conversation_history_base = []
conversation_history_finetuned = []

# --- System Prompt Definition ---
# Reasoning: Define the guiding instructions for the LLM's behavior and persona.
# This is prepended to the conversation history (conceptually or explicitly).
# It sets constraints (no medical advice), defines the role (sleep coach), and suggests tone.
SYSTEM_PROMPT = """You are 'NapCoach', a specialized Sleep Coaching Assistant running on a Mistral-based model. Your primary function is to provide helpful, safe, accurate, and encouraging information related to sleep health, habits, and interpreting general sleep patterns based on user queries and provided data.

**Your Core Directives:**
1.  **Focus:** Stick strictly to sleep-related topics (hygiene, habits, cycles, interpreting non-medical data trends).
2.  **Safety First:** NEVER provide medical advice, diagnoses, or treatment recommendations. If a user asks for medical advice or describes potentially serious symptoms (e.g., severe insomnia, sleep apnea symptoms like gasping), YOU MUST respond with: "For medical concerns or diagnoses, it's essential to consult with a qualified healthcare professional." Do not elaborate further on the medical topic.
3.  **Data Interpretation:** If provided with sleep data (wearable summaries, diary entries), interpret general trends and compare them to typical healthy ranges (e.g., "Your sleep efficiency of 88% is generally considered good."). Avoid definitive statements about health based solely on data.
4.  **Knowledge Base:** Rely ONLY on the information present in the conversation history and your training data. Do not invent facts or statistics. If you don't know something specific, state: "I don't have specific information on that topic, but I can discuss general sleep principles."
5.  **Persona:** Be friendly, supportive, knowledgeable (within bounds), and concise. Encourage good sleep habits.
6.  **Formatting:** Keep responses well-structured and relatively brief.
"""

# --- Mistral Prompt Formatting Function ---
# Reasoning: Mistral Instruct models require a specific format using special tags ([INST], [/INST], <s>, </s>).
# This function takes a list of history messages and constructs the correctly formatted string.
def format_prompt_mistral(messages):
    """Formats a list of message dictionaries into the Mistral Instruct prompt format."""
    prompt_str = ""
    if not messages: return ""

    # Handle potential system message if it's the first item
    if messages[0]["role"] == "system":
        # Mistral doesn't have an explicit system role tag in the same way as some others.
        # The convention is often to prepend the system instructions to the *first* user message.
        # We'll handle this conceptually in get_llm_response by ensuring the system prompt is considered.
        messages = messages[1:] # Remove system message from turn list for formatting loop

    # Construct the string turn-by-turn
    for i, msg in enumerate(messages):
        role = msg["role"]
        content = msg["content"]
        if role == "user":
            # Add BOS token <s> only for the very first instruction turn in the formatted string
            prefix = "<s>" if i == 0 else ""
            prompt_str += f"{prefix}[INST] {content} [/INST]"
        elif role == "assistant":
            # Add EOS token </s> after every assistant response
            prompt_str += f" {content}</s>"
            # If the *next* message is also from the assistant (shouldn't happen with proper history),
            # we might need another separator, but standard user/assistant alternation avoids this.

    # The final prompt string should end with [/INST] (the user's last message),
    # making it ready for the model to generate the assistant's response.
    return prompt_str

# --- Core LLM Response Generation Function ---
def get_llm_response(user_message, model_to_use, tokenizer_to_use, history, is_finetuned_model=False):
    """
    Gets a response from the specified LLM, manages history, and handles formatting.

    Args:
        user_message (str): The user's latest input text.
        model_to_use: The loaded LLM model object (base or fine-tuned).
        tokenizer_to_use: The loaded tokenizer object corresponding to the model.
        history (list): The list storing the conversation history for this model.
        is_finetuned_model (bool): Flag to indicate if the fine-tuned model is being used (for logging).

    Returns:
        The generated assistant response (str) or an error message.
    """
    # Reasoning: Perform checks to ensure model and tokenizer objects are valid before proceeding.
    if not model_to_use or not tokenizer_to_use:
        print("❌ Error: LLM model or tokenizer is not available.")
        return "My apologies, I'm unable to process your request right now as my core module isn't ready."

    model_identifier = "Fine-tuned Model" if is_finetuned_model else "Base Model"
    print(f"\n🤔 Thinking... (Using {model_identifier})")

    # Reasoning: Manage history. Prepend system prompt if history is empty. Add current user message.
    current_turn_history = list(history) # Make a copy to potentially modify for this turn
    if not current_turn_history:
        # If history is empty, conceptually add the system prompt.
        # We will prepend it to the user message for the Mistral format.
         current_turn_history.append({"role": "user", "content": f"{SYSTEM_PROMPT}\n\nUser query: {user_message}"})
         history.append({"role": "system", "content": SYSTEM_PROMPT}) # Add system to actual history
         history.append({"role": "user", "content": user_message}) # Add user msg to actual history
    else:
         # History already exists, just add the user message
         current_turn_history.append({"role": "user", "content": user_message})
         history.append({"role": "user", "content": user_message}) # Add user msg to actual history


    # Reasoning: Format the history (including the latest user message) into the model-specific prompt string.
    prompt = format_prompt_mistral(current_turn_history)

    # Reasoning: Determine the device the model is currently loaded on (e.g., 'cuda:0').
    # This ensures input tensors are moved to the same device as the model.
    try:
         current_device = next(model_to_use.parameters()).device
    except Exception as e:
         print(f"❌ Error detecting model device: {e}. Assuming CPU.")
         current_device = 'cpu'


    try:
        # Reasoning: Convert the formatted prompt string into numerical tokens using the tokenizer.
        # return_tensors="pt": Return PyTorch tensors.
        # .to(current_device): Move the tensors to the GPU (or CPU if model is there).
        model_inputs = tokenizer_to_use(prompt, return_tensors="pt").to(current_device)

        # --- Generation Parameters ---
        # Reasoning: Configure how the model generates the response text.
        generation_config = {
            "max_new_tokens": 250,          # Max tokens to generate for the response. Adjust based on desired length.
            "pad_token_id": tokenizer_to_use.eos_token_id, # Use EOS token for padding during generation.
            "do_sample": True,             # Enable sampling for less deterministic, more natural responses.
            "temperature": 0.6,            # Controls randomness. Lower = more focused/deterministic, Higher = more random. (0.6 is slightly conservative).
            "top_p": 0.9,                  # Nucleus sampling: consider only the most probable tokens whose cumulative probability exceeds top_p. (0.9 is common).
            # "top_k": 50,                 # Optional: Alternative sampling: consider only the top_k most likely tokens. Often used with or instead of top_p.
            # "repetition_penalty": 1.1    # Optional: Penalize repeating tokens slightly (values > 1).
        }

        print(f"   Generating response with config: {generation_config}")
        start_time = time.time()
        # Reasoning: Disable gradient calculations during inference. This saves memory and speeds up generation.
        with torch.no_grad():
             # Reasoning: Call the model's 'generate' method with the tokenized input and generation parameters.
             generated_ids = model_to_use.generate(**model_inputs, **generation_config)
        end_time = time.time()
        duration = end_time - start_time

        # Reasoning: Decode the generated token IDs back into a string.
        # Important: Slice generated_ids to remove the input prompt tokens, keeping only the newly generated response tokens.
        # [0] accesses the first (and usually only) sequence in the batch.
        response_ids = generated_ids[0][model_inputs["input_ids"].shape[1]:]
        assistant_response = tokenizer_to_use.decode(response_ids, skip_special_tokens=True).strip()

        print(f"   ✅ LLM Response ({duration:.2f}s): '{assistant_response}'")

        # Reasoning: Add the successfully generated assistant response to the *persistent* history list for future context.
        history.append({"role": "assistant", "content": assistant_response})

        return assistant_response

    except Exception as e:
        print(f"❌ Error during LLM generation or decoding: {e}")
        # Reasoning: Implement recovery logic. If generation fails, remove the last user message and the potentially failed
        # assistant message from the persistent history to prevent the error from corrupting future turns.
        if history:
            if history[-1]["role"] == "user":
                history.pop() # Remove the user message that caused the error
            elif history[-1]["role"] == "assistant":
                # This case is less likely but possible if error happens after adding assistant response
                history.pop() # Remove failed assistant response
                if history and history[-1]["role"] == "user":
                     history.pop() # Remove triggering user message too


        return "My apologies, I encountered an unexpected issue while generating a response. Could you please rephrase or try again?"

print("\n✅ 'Think' function defined (get_llm_response) with history management.")
# You can uncomment the example usage lines in the previous version's Step 6 cell to test the base model now if desired.

# Example Usage (Optional - uncomment to test the base model)
# print("\n--- Testing LLM Response (Base Model) ---")
# if llm_model and llm_tokenizer:
#      test_resp = get_llm_response("What are the main stages of sleep?", llm_model, llm_tokenizer, conversation_history_base)
#      print(f"\nBase Model Response: {test_resp}")
#      print(f"\nBase History: {conversation_history_base}")
# else:
#      print("\nCannot test LLM, model or tokenizer not loaded.")
# print("--- End Test ---")

# @title Step 7: Define "Speak" Function (TTS Audio Synthesis & Display)
# Reasoning: Defines the function that takes text, converts it to speech using the loaded Piper TTS model,
# saves the audio, and makes it playable within the Colab notebook output.

from IPython.display import Audio, display # For embedding audio player in Colab.
import soundfile as sf # For writing audio data to WAV files.
import numpy as np # For handling audio data arrays.
import time # For timing the synthesis process.
from pathlib import Path # For path manipulation.
import os # For creating directories.

# --- Output Directory Setup ---
# Reasoning: Create a dedicated directory to store the generated audio responses.
# This keeps the Colab filesystem organized.
audio_output_dir = Path("./agent_audio_output")
audio_output_dir.mkdir(exist_ok=True) # Create it if it doesn't exist.
# Reasoning: Use a simple counter to give each saved audio file a unique name.
output_audio_counter = 0

# --- Text-to-Speech Function ---
def speak_text_and_display(text_to_speak):
    """
    Synthesizes text using Piper TTS, saves as WAV, and displays playable audio in Colab.

    Args:
        text_to_speak (str): The text the agent should say.
    """
    global output_audio_counter
    # Reasoning: Check if the TTS model object is loaded and ready.
    if not tts_voice:
        print("❌ Error: Piper TTS voice ('tts_voice') is not loaded.")
        return

    print(f"\n🔊 Synthesizing speech...")
    start_time = time.time()
    try:
        # Reasoning: Use the synthesize_stream_raw method which yields chunks of raw audio bytes (PCM data).
        # Iterate through these chunks and concatenate them into a single NumPy array.
        # Specify dtype=np.int16 as this is common for Piper's PCM output.
        audio_data = np.concatenate([
            np.frombuffer(chunk, dtype=np.int16)
            for chunk in tts_voice.synthesize_stream_raw(text_to_speak)
        ])

        # Reasoning: Handle cases where synthesis might produce no audio (e.g., empty input text).
        if audio_data.size == 0:
            print("   ⚠️ Warning: TTS synthesis resulted in empty audio data.")
            return

        end_time = time.time()
        duration = end_time - start_time
        print(f"   (Audio synthesis took {duration:.2f} seconds)")

        # --- Save Audio File ---
        # Reasoning: Construct a unique filename using the counter for the WAV file.
        output_filename = audio_output_dir / f"napcoach_response_{output_audio_counter}.wav"
        output_audio_counter += 1
        # Reasoning: Use the soundfile library (sf.write) to save the NumPy audio data array
        # to a WAV file. It's crucial to provide the correct 'tts_sample_rate' obtained during TTS model loading.
        sf.write(output_filename, audio_data, tts_sample_rate)
        print(f"   💾 Audio saved to: {output_filename}")

        # --- Display Audio Player in Colab ---
        # Reasoning: Use IPython.display.Audio to embed an HTML5 audio player
        # directly into the Colab cell's output, allowing playback of the saved WAV file.
        # autoplay=False is generally preferred to avoid unexpected sound.
        display(Audio(str(output_filename), autoplay=False))

    except Exception as e:
        print(f"❌ Error during TTS synthesis or saving/displaying audio: {e}")

print("\n✅ 'Speak' function defined (speak_text_and_display).")
# You can uncomment the example usage lines in the previous version's Step 7 cell to test TTS now if desired.

# Example Usage (Optional - uncomment to test)
# print("\n--- Testing TTS ---")
# speak_text_and_display("Hello! This is your sleep coach speaking.")
# if 'test_resp' in locals() and test_resp:
#      speak_text_and_display(test_resp)
# print("--- End Test ---")

# @title Define Gradio-Specific TTS Function (Place after your original Step 7)

from pathlib import Path # Ensure Path is imported
import numpy as np    # Ensure numpy is imported
import soundfile as sf  # Ensure soundfile is imported
import re             # Ensure re is imported
import os             # Ensure os is imported
import time           # Ensure time is imported

# This directory will store TTS outputs for Gradio to serve
GRADIO_TTS_OUTPUT_DIR = Path("./gradio_tts_audio_responses")
os.makedirs(GRADIO_TTS_OUTPUT_DIR, exist_ok=True)

def synthesize_for_gradio_and_autoplay(text_to_speak: str) -> str | None:
    """
    Synthesizes speech, saves to a file, and returns the filepath.
    Gradio's Audio component with autoplay=True will play it.
    """
    global tts_voice, tts_sample_rate # Assumes these are globally loaded from your setup

    if not text_to_speak:
        print("Gradio TTS: No text to synthesize.")
        return None
    if tts_voice is None:
        print("❌ Gradio TTS: TTS Voice not loaded!")
        return None

    timestamp = int(time.time() * 1000)
    # Save directly into a location Gradio can serve if needed, or just a temp path.
    # For simplicity, we'll save it and Gradio will make it accessible.
    output_filename = f"napcoach_gradio_response_{timestamp}.wav"
    output_filepath = GRADIO_TTS_OUTPUT_DIR / output_filename

    try:
        print(f"Gradio TTS: Synthesizing '{text_to_speak}' to {output_filepath}...")

        # --- TTS Pre-processing for dashes ---
        processed_tts_text = text_to_speak
        processed_tts_text = re.sub(r'(\d+)-(\d+)', r'\1 to \2', processed_tts_text)
        # Add any other pre-processing you need here
        # --- End TTS pre-processing ---

        audio_data_chunks = []
        for chunk in tts_voice.synthesize_stream_raw(processed_tts_text):
            audio_data_chunks.append(np.frombuffer(chunk, dtype=np.int16))

        if not audio_data_chunks:
            print("Gradio TTS: No audio data generated from synthesize_stream_raw.")
            return None

        audio_data = np.concatenate(audio_data_chunks)
        if audio_data.size == 0:
            print("Gradio TTS: Concatenated audio data is empty.")
            return None

        sf.write(str(output_filepath), audio_data, tts_sample_rate)
        print(f"Gradio TTS: Audio saved to {output_filepath}")
        return str(output_filepath) # Return the filepath as a string
    except Exception as e:
        print(f"❌ Gradio TTS Error: {e}")
        return None

print("✅ Gradio-specific TTS function 'synthesize_for_gradio_and_autoplay' defined.")

# @title Step 8: Prepare and Load Fine-Tuning Dataset (Using User-Provided Data)
# Reasoning: This step defines, saves, and loads the specialized dataset used to adapt the LLM
# for sleep coaching conversations, incorporating wearable data, diaries, and research knowledge.
# This version uses the comprehensive dataset provided by the user.

from datasets import load_dataset, Dataset, Features, Value
import json
import os

# --- Define Fine-Tuning Data (User-Provided Examples) ---
# Reasoning: Use the high-quality, diverse examples provided by the user, covering wearables,
# diaries, research Q&A, and safety boundaries. Store as a list of dictionaries.
fine_tuning_examples = [
    {"instruction": "User data: Slept 5 hours 10 minutes, 45 mins deep sleep, 1 hour REM. Felt very tired waking up. Question: What should I focus on?", "response": "Getting only around 5 hours isn't ideal for most adults, which likely explains the tiredness. Focus on extending your total sleep time towards the 7-9 hour range. A consistent wind-down routine before bed might help you fall asleep faster and get more restorative deep and REM sleep."},
    {"instruction": "What's the ideal temperature for a bedroom for good sleep?", "response": "Research suggests a cool room, typically between 60-67 degrees Fahrenheit (15-19 Celsius), is optimal for sleep. This helps lower your core body temperature, signaling it's time to rest."},
    {"instruction": "I woke up frequently last night, maybe 4-5 times according to my tracker. Is this normal?", "response": "Brief awakenings during the night are normal and often forgotten. However, waking up 4-5 times and remembering it might indicate fragmented sleep. Consider factors like noise, light, temperature, needing the bathroom, or stress. If it persists, keeping a sleep diary or consulting a professional could be helpful."},
    {"instruction": "Analyze this: Bedtime 11:30 PM, Wake time 7:00 AM. Time in bed 7.5 hours. Total Sleep: 6 hours 45 mins. Sleep Efficiency: 90%. Deep: 1h 15m, REM: 1h 30m. Feeling: Okay.", "response": "Your sleep efficiency of 90% is good (above 85% is generally considered fine). Total sleep time is near the lower end of the recommended 7-9 hours, which might explain feeling just 'Okay'. You have healthy amounts of Deep and REM sleep within that time. If you want to feel more rested, you could try shifting your bedtime slightly earlier to increase total sleep duration."},
    {"instruction": "My sleep tracker says I only got 30 minutes of deep sleep last night out of 7 hours total. Is that bad?", "response": "While individual needs vary, typically deep sleep makes up about 13-23% of total sleep for adults. 30 minutes out of 7 hours is roughly 7%, which is on the low side. Factors like stress, alcohol before bed, or an uncomfortable sleep environment can reduce deep sleep. Focusing on a consistent bedtime routine and managing stress might help increase it."},
    {"instruction": "I went to bed at 10 PM but didn't fall asleep until after midnight. What can I do?", "response": "Lying in bed awake for that long can create frustration. If you don't fall asleep within about 20-30 minutes, it's often recommended to get out of bed, do a relaxing, non-stimulating activity (like reading a physical book under dim light) in another room, and only return to bed when you feel sleepy again. Also, review your pre-sleep routine – avoid screens and caffeine late in the day."},
    {"instruction": "Is it better to sleep cold or warm?", "response": "Generally, a cooler room temperature is better for sleep, typically around 60-67 degrees Fahrenheit (15-19 Celsius). A cool environment helps lower your core body temperature, which signals to your body that it's time to sleep. Sleeping too warm can lead to restlessness and fragmented sleep."},
    {"instruction": "What's sleep hygiene?", "response": "Sleep hygiene refers to habits and practices that help promote consistently good sleep. Key elements include maintaining a regular sleep-wake schedule (even on weekends), creating a relaxing bedtime routine, ensuring your bedroom is dark, quiet, and cool, avoiding large meals, caffeine, and alcohol close to bedtime, and getting regular exercise (but not too close to bedtime)."},
    {"instruction": "My Whoop report: 7h 10m Time in Bed, 6h 35m Sleep, 89% Efficiency. Stages: 15% Awake, 25% REM, 45% Light, 15% Deep. RHR 55bpm, HRV 68ms. Felt pretty good waking up. Is this a good sleep profile?", "response": "Based on your report, that looks like a solid night! An efficiency of 89% is great (above 85% is generally good). 6h 35m of sleep is within the healthy range for many adults, though some need more. Your sleep stage percentages (25% REM, 15% Deep) are well within typical healthy ranges. The RHR of 55 and HRV of 68ms also sound positive, reflecting good recovery. Most importantly, you felt good waking up, which is a key indicator!"},
    {"instruction": "Fitbit stats: TIB 8h 30m, TST 7h 05m, Efficiency 83%. Stages: Awake 1h 25m (17%), REM 1h 15m (18%), Light 4h 30m (64%), Deep 55m (13%). RHR 60bpm. I feel tired and sluggish today. Why?", "response": "Thanks for sharing. While 7h 05m total sleep isn't drastically low, the lower efficiency (83%) and significant time awake (nearly 1.5 hours) suggest your sleep might have been fragmented. Also, your Deep sleep percentage (13%) is right at the lower edge of the typical 13-23% range. Deep sleep is vital for physical restoration, so getting less could contribute to feeling tired and sluggish. Focusing on minimizing nighttime disturbances and optimizing your sleep environment might help improve both efficiency and deep sleep."},
    {"instruction": "Apple Health Sleep shows only 45 minutes of Deep sleep out of 7.5 hours total sleep. Is this something to worry about?", "response": "Deep sleep typically makes up about 13-23% of total sleep for adults, so 45 minutes out of 7.5 hours (which is about 10%) is indeed on the lower side. While occasional nights with less deep sleep happen, consistently getting low amounts might leave you feeling less physically refreshed. Factors like stress, alcohol consumption before bed, sleeping too warm, or irregular sleep schedules can reduce deep sleep. Improving sleep hygiene often helps. However, if you consistently feel unrefreshed or have concerns, it's always best to consult with a healthcare professional for personalized advice."},
    {"instruction": "My Garmin recorded a Resting Heart Rate of 65bpm during sleep, usually it's around 55bpm. What could cause this increase?", "response": "Resting Heart Rate (RHR) during sleep can fluctuate based on various factors. An occasional increase like going from 55 to 65bpm could be due to things like increased stress the previous day, eating a heavy meal or drinking alcohol close to bedtime, fighting off a mild illness (even before you feel sick), or even just a warmer sleeping environment. Look for trends over several days rather than focusing on a single night. If your RHR stays elevated consistently or if you have other symptoms or concerns, checking in with a healthcare professional is recommended."},
    {"instruction": "What does Heart Rate Variability (HRV) during sleep mean? Mine was 55ms.", "response": "Heart Rate Variability (HRV) measures the variation in time between consecutive heartbeats. Higher HRV during sleep is generally associated with better rest, recovery, and nervous system balance (specifically, more parasympathetic or 'rest-and-digest' activity). 55ms is a specific value, and what's 'good' is highly individual and depends on your baseline, age, fitness, etc. It's more useful to track your *trend* over time rather than comparing to others. A consistent rise might indicate improved recovery, while a significant drop could suggest increased stress, overtraining, or illness."},
    {"instruction": "Sleep Diary: Bed 10:30 PM, intended sleep 11:00 PM. Read book, lights out 11:10 PM. Mind felt busy, tossed around. Think I fell asleep around 12:45 AM. Woke up ~4 AM, couldn't get back sleep until 5:15 AM. Alarm 6:30 AM. Feel wrecked. Had a stressful work day.", "response": "That sounds like a really rough night, and it makes sense you feel wrecked. The diary clearly shows significant difficulty falling asleep (over 1.5 hours after lights out) and a long awakening mid-night (over an hour). High stress from your workday very likely contributed to your mind racing and made it hard to initiate and maintain sleep. When this happens, standard advice includes having a consistent wind-down routine specifically designed to de-stress before bed (e.g., meditation, gentle stretching, journaling). If high stress frequently impacts your sleep, exploring stress management techniques during the day might also be beneficial."},
    {"instruction": "Diary: Asleep 11:30 PM, woke 7:30 AM feeling amazing! Slept soundly. Odd thing: I skipped dinner last night because I worked late.", "response": "It's fantastic you felt amazing after sleeping soundly for 8 hours! While skipping dinner isn't generally recommended as a regular habit for overall health and energy levels, it might not have negatively impacted your sleep *this specific night*, especially if you weren't overly hungry. Sometimes, a very large or heavy meal close to bed can disrupt sleep more than skipping occasionally might. However, consistently skipping meals can affect energy and potentially sleep patterns long-term, so it's good you noticed it was unusual."},
    {"instruction": "I wake up at 4 AM almost every single morning, wide awake, and can't get back to sleep. I go to bed around 11 PM and need to wake at 7 AM. Help!", "response": "Waking up too early and being unable to return to sleep (known as early morning awakening insomnia) is really disruptive. Potential contributing factors include stress, anxiety, your natural circadian rhythm shifting earlier (common as we age), or even light exposure too early. Some strategies include: 1. Ensuring your bedroom is completely dark (use blackout curtains if needed). 2. Avoid checking the time when you wake up. 3. Stick to a very consistent bedtime. 4. Consider if you're going to bed *too* early for your natural rhythm. 5. If you don't fall back asleep after 15-20 mins, get up and do a quiet activity until sleepy. If this persists despite trying these, consulting a healthcare professional is important to rule out other causes."},
    {"instruction": "Can you explain the different stages of sleep simply?", "response": "Think of sleep happening in cycles. Each cycle has stages: First, you drift into LIGHT sleep (Stages N1 and N2), where your body starts relaxing. Then, you enter DEEP sleep (Stage N3 or Slow-Wave Sleep), which is crucial for physical repair, growth, and feeling refreshed. After deep sleep, you typically go into REM (Rapid Eye Movement) sleep, where most dreaming occurs, and which is vital for memory consolidation, learning, and emotional processing. You cycle through these stages multiple times a night, with more deep sleep earlier and more REM sleep later."},
    {"instruction": "What's the most important part of sleep hygiene?", "response": "While all aspects of sleep hygiene are helpful, arguably the MOST important element for many people is maintaining a **consistent sleep-wake schedule**. Going to bed and waking up around the same time every day, even on weekends, helps regulate your body's internal clock (circadian rhythm). This makes it easier to fall asleep at night and wake up feeling refreshed naturally. Other key elements like creating a cool, dark, quiet environment and having a relaxing bedtime routine are also very important, but the schedule often provides the strongest foundation."},
    {"instruction": "Is it bad to use my phone in bed before trying to sleep?", "response": "Yes, it's generally advised against using your phone (or other bright screens like tablets/laptops) in bed before sleep. The blue light emitted suppresses melatonin production, making it harder to feel sleepy. Additionally, the content you engage with (social media, news, games) can be mentally stimulating or even stressful, further hindering your ability to wind down. It's best to put screens away at least 30-60 minutes before bed and keep the bedroom as a screen-free zone associated with sleep."},
    {"instruction": "I feel sleepy around 3 PM every day. Should I take a nap?", "response": "That afternoon dip in alertness is quite common due to our natural circadian rhythm. Whether to nap depends on your situation. A short 'power nap' of about 20-30 minutes can effectively boost alertness without causing grogginess (sleep inertia) or significantly impacting your nighttime sleep. However, if you have trouble falling asleep or staying asleep at night (insomnia), it's often recommended to avoid napping altogether, as it can reduce your 'sleep drive' for nighttime sleep. If you do nap, keep it short and avoid napping too late in the day (e.g., after 4 PM)."},
    {"instruction": "Does eating close to bedtime affect sleep?", "response": "Yes, eating a large or heavy meal close to bedtime can negatively affect sleep. Digestion is an active process that can raise your body temperature slightly and potentially cause discomfort like indigestion or heartburn, making it harder to fall asleep or stay asleep. It's generally recommended to finish dinner at least 2-3 hours before your intended bedtime. If you're hungry later, a small, light, easily digestible snack might be okay, but avoid heavy, fatty, or spicy foods."},
    {"instruction": "I think I might have sleep apnea, I snore loudly and sometimes wake up choking.", "response": "For medical concerns or diagnoses, it's essential to consult with a qualified healthcare professional. Snoring loudly and waking up choking or gasping can be signs of sleep apnea, which requires proper medical evaluation and treatment."},
    {"instruction": "Explain why a cool room helps sleep.", "response": "A cool room helps facilitate sleep because our core body temperature naturally needs to drop slightly to initiate and maintain sleep. A cooler environment (typically 60-67°F or 15-19°C) supports this natural temperature drop, signaling to your body that it's time to rest. A room that's too warm can interfere with this process, leading to restlessness, more awakenings, and less time spent in restorative deep and REM sleep."},
    {"instruction": "My sleep tracker often shows less 'Total Sleep Time' than my 'Time in Bed'. Why the difference?", "response": "That's completely normal! 'Time in Bed' is simply the total duration from when you got into bed until you got out of bed. 'Total Sleep Time' is the estimate of how much of that time you were actually asleep. The difference accounts for the time it took you to fall asleep initially (sleep latency), any time you spent awake during the night (whether you remember it or not), and time spent in bed after waking up in the morning. Higher 'Sleep Efficiency' (Total Sleep Time divided by Time in Bed) generally indicates better sleep quality."},
    {"instruction": "Is 'Light Sleep' important? My tracker says I get a lot of it.", "response": "Yes, light sleep (which includes stages N1 and N2) is important and typically makes up the largest portion of total sleep time (around 50-60% or more). While N1 is very light (the transition phase), N2 is where you spend much of your sleep. During N2, your body continues to relax, body temperature drops, and brain waves slow down. It plays a role in memory consolidation and processing, and it serves as the pathway to deeper sleep stages and REM. So, getting plenty of light sleep is a normal and necessary part of a healthy sleep cycle."},
    {"instruction": "User Data: TST: 5h 30m, Efficiency 75%, Deep 1h (18%), REM 1h (18%), RHR 50. Feeling surprisingly okay. Is this sustainable?", "response": "It's interesting that you feel okay despite getting only 5.5 hours of sleep, which is significantly less than the recommended 7-9 hours for most adults. While your Deep and REM percentages within that sleep time look reasonable, and your RHR is low, consistently getting this little sleep is generally not considered sustainable or optimal for long-term physical and cognitive health, even if you feel functional in the short term. Chronic partial sleep deprivation can have cumulative negative effects. It might be worth exploring strategies to gradually extend your sleep duration closer to the recommended range."},
    {"instruction": "Can meditation help me sleep better?", "response": "Yes, meditation and mindfulness practices can be very effective tools for improving sleep for many people. They work by helping to calm the mind, reduce racing thoughts, lower stress and anxiety levels, and activate the body's relaxation response (parasympathetic nervous system). Practicing regularly, especially as part of a wind-down routine before bed, can make it easier to fall asleep and potentially improve sleep quality. There are many guided meditations specifically designed for sleep available through apps or online."},
    {"instruction": "My Whoop recovery score is low (35%) even though I slept 8 hours. What gives?", "response": "A low recovery score on Whoop despite adequate sleep duration often suggests that while you slept long enough, the *quality* of that sleep might not have been optimal for recovery, or other factors influenced the score. Key metrics impacting Whoop recovery include HRV, RHR, and Respiratory Rate, compared to your personal baseline. A low score could indicate higher-than-usual RHR, lower-than-usual HRV, or elevated respiratory rate during sleep. This might happen due to factors like high stress the previous day, intense late-night exercise, illness, alcohol consumption, poor sleep environment, or even just natural variation. Look at the specific metric trends within the app for more clues."},
    {"instruction": "Diary: Fell asleep around 11pm, woke up 6am. But I remember dreaming ALL night, really vivid dreams. Felt a bit mentally tired after. Normal?", "response": "Remembering vivid dreams often happens when you wake up directly from or soon after REM sleep. REM sleep is most concentrated in the later part of the night, so waking up around 6am after falling asleep at 11pm means you likely had significant REM periods. While dreaming is normal and healthy (part of REM's function), feeling mentally tired *despite* adequate sleep duration might occasionally happen after nights with unusually intense or prolonged REM, though it's often linked to other factors too. If it's just an occasional occurrence and you generally feel okay, it's likely normal variation in your sleep cycles."},
    {"instruction": "What's the connection between stress and sleep?", "response": "Stress and sleep have a strong, two-way relationship. High stress levels activate the body's 'fight-or-flight' response, releasing hormones like cortisol and adrenaline. These hormones increase alertness and physiological arousal, making it difficult to fall asleep and stay asleep (causing insomnia). Conversely, poor sleep or lack of sleep increases stress hormone levels, reduces emotional resilience, and makes you more susceptible to feeling stressed during the day. Managing stress through techniques like exercise, mindfulness, and relaxation is therefore crucial for improving sleep, and getting good sleep is vital for managing stress."},
    {"instruction": "Is it better to stick to the same bedtime or the same wake-up time?", "response": "While both are helpful, sleep experts generally emphasize that sticking to a **consistent wake-up time** is more critical for anchoring your body's internal clock (circadian rhythm). Waking up at the same time every day, even on weekends, helps regulate your sleep-wake cycle most effectively. Your bedtime might naturally vary slightly based on when you feel tired, but the consistent wake-up time provides the strongest signal for maintaining rhythm."},
    {"instruction": "Explain 'sleep pressure' or 'sleep drive'.", "response": "Sleep pressure, also called sleep drive or homeostatic sleep drive, is the natural build-up of sleepiness that occurs the longer you stay awake. Think of it like hunger – the longer you go without eating, the hungrier you get. Similarly, the longer you're awake, the more a substance called adenosine accumulates in your brain, increasing the 'pressure' to sleep. Getting sufficient sleep resets this pressure. Understanding sleep pressure helps explain why naps can sometimes make nighttime sleep harder (they reduce the pressure) and why maintaining a consistent schedule helps align sleep pressure with your circadian rhythm."},
    {"instruction": "My partner says I stopped breathing momentarily in my sleep. What should I do?", "response": "For medical concerns or diagnoses, it's essential to consult with a qualified healthcare professional. Pauses in breathing during sleep can be a sign of sleep apnea, a condition that needs proper medical evaluation and management. Please schedule an appointment with your doctor to discuss what your partner observed."},
    {"instruction": "Can white noise help with sleep?", "response": "White noise can be helpful for some people, especially those sensitive to background noise or living in noisy environments. It works by creating a constant, consistent soundscape that masks other, more abrupt or jarring sounds (like traffic, doors closing, or a partner snoring) that might otherwise wake you up or prevent you from falling asleep. However, it's not beneficial for everyone, and some prefer complete quiet. If you try it, keep the volume at a reasonable level."},
    {"instruction": "Fitbit Sleep Score was 75 ('Fair'). TST 6h 50m, Deep 15%, REM 20%. Why just 'Fair'?", "response": "A Fitbit Sleep Score considers multiple factors beyond just duration. A score of 75 ('Fair') likely means some aspects were good, while others could be improved. While your Deep (15%) and REM (20%) percentages are within typical ranges, the Total Sleep Time (6h 50m) is slightly below the commonly recommended 7+ hours. Fitbit also heavily weights 'Restoration' metrics, which look at your sleeping heart rate compared to your resting heart rate and how much time you spent below RHR. Perhaps your sleeping heart rate wasn't as low as usual, or you had periods of restlessness (counted in the 'Awake/Restless' time). Check the detailed breakdown in the Fitbit app for more specific insights on why the score wasn't higher."},
    {"instruction": "I drank coffee at 4 PM yesterday and couldn't fall asleep until 1 AM. Is that related?", "response": "Yes, that's very likely related. Caffeine has an average half-life of about 5-6 hours, meaning half the caffeine is still in your system that long after consumption. Drinking coffee at 4 PM means a significant amount was likely still active in your brain when you tried to sleep around 11 PM or midnight, blocking adenosine receptors and promoting alertness. This commonly causes difficulty falling asleep (increased sleep latency). To avoid this, it's generally recommended to stop caffeine intake at least 6-8 hours before your intended bedtime."},
    {"instruction": "Whoop: TST 6h 55m, Eff 85%, Awake 1h 10m (14%), REM 28%, Light 42%, Deep 16%. RHR 57, HRV 58ms. Diary: Felt pretty stressed yesterday, did a light yoga routine before bed. Woke up feeling decent, not amazing.", "response": "Getting almost 7 hours of sleep with 85% efficiency is quite good, especially considering you were stressed yesterday. Your REM sleep (28%) is excellent, suggesting good cognitive processing, and Deep sleep (16%) is within the normal range. The RHR (57) and HRV (58ms) seem reasonable, though perhaps not your personal best if stress was high. The light yoga was a good choice for wind-down. Feeling 'decent' rather than 'amazing' could simply be the lingering effects of yesterday's stress, even if sleep metrics look mostly okay. Consistent stress management and sleep routines should help."},
    {"instruction": "Fitbit: TIB 8h 40m, TST 7h 50m, Eff 90%. Stages: Deep 10%, REM 19%. RHR 60. Question: My Deep and REM % seem low even though I slept almost 8 hours with good efficiency. What gives?", "response": "That's a valid observation. While your overall duration (7h 50m) and efficiency (90%) are excellent, having both Deep sleep at 10% (below 13-23% typical range) and REM at 19% (at the very low end of 20-25% typical range) suggests the *architecture* of your sleep might be suboptimal, even if it was continuous. Factors that can selectively suppress these stages include alcohol consumption, certain medications, inconsistent sleep schedules (even if total time is long), or underlying stress/health issues. It might be worth reviewing your pre-sleep habits and environment for potential disruptors specifically affecting these restorative stages."},
    {"instruction": "Apple Watch: TST 7h 00m. Respiratory Rate during sleep averaged 16 breaths/min, usually it's 14. Should I worry?", "response": "A small increase in average Respiratory Rate during sleep, like from 14 to 16 breaths per minute, isn't usually a cause for immediate concern on its own, as it can fluctuate slightly. Potential minor causes include nasal congestion (making breathing slightly harder), sleeping in a warmer room, recent strenuous exercise, or even changes in dream activity. However, consistently elevated respiratory rate, especially if accompanied by other symptoms like loud snoring, gasping, excessive daytime sleepiness, or drops in blood oxygen (if measured), could potentially indicate underlying issues. For medical concerns or diagnoses, it's essential to consult with a qualified healthcare professional."},
    {"instruction": "Garmin Body Battery started at 60 yesterday, drained to 5 by bedtime, and only recharged to 55 overnight despite 7h TST. Why such poor recharge?", "response": "Garmin's Body Battery estimates your energy reserves based on HRV, stress, and activity. Recharging primarily happens during restful sleep. Only recharging to 55 despite 7 hours TST suggests the sleep wasn't as restorative as it could have been, or daytime stress/activity significantly depleted your reserves. Factors contributing to poor recharge include: high stress levels carrying into sleep (check overnight stress graph if available), poor sleep quality (even if duration seems okay - look at HRV/RHR trends during sleep), consuming alcohol, being unwell, or over-training. The low starting point (drain to 5) also means more recovery was needed. Focus on both stress management during the day and optimizing sleep quality at night."},
    {"instruction": "Oura Ring shows my body temperature deviated +0.5 degrees Celsius above baseline last night. I feel fine though.", "response": "Oura tracks subtle body temperature changes during sleep. A deviation of +0.5°C (about +0.9°F) above your baseline is a noticeable increase. While you feel fine now, this often indicates your body is fighting something off – it can be an early sign of illness even before other symptoms appear. It could also be related to factors like intense exercise late in the day, a large meal close to bed, alcohol, or hormonal cycles (in women). Keep an eye on how you feel over the next day or two, and monitor the temperature trend. If it stays elevated or you develop symptoms, prioritize rest."},
    {"instruction": "Diary: Bed 11pm, Asleep 11:20pm. Woke 6:45am. Watch Data: TST 7h 30m, Eff 95%, Deep 20%, REM 25%, RHR 51, HRV 70. Feeling: STILL TIRED! How is this possible?", "response": "That's frustrating! Objectively, your sleep data looks fantastic – excellent duration, efficiency, stage balance, and recovery metrics (RHR/HRV). Feeling tired despite this can sometimes happen due to: 1. **Sleep Inertia:** Significant grogginess immediately upon waking that might take longer than usual to shake off. 2. **Accumulated Sleep Debt:** If you were significantly sleep-deprived on previous nights, one good night might not be enough to fully recover. 3. **Underlying Health Factors:** Things unrelated to sleep quality itself (like nutrient deficiencies, thyroid issues, chronic fatigue syndrome, low-grade illness) can cause daytime tiredness. 4. **Circadian Misalignment:** Even if duration is good, perhaps the timing isn't quite right for your internal clock. Given the excellent metrics, if this persists, exploring non-sleep factors with a healthcare professional might be warranted."},
    {"instruction": "Diary: Stayed up until 2 AM studying for an exam, slept until 7 AM (5 hours). Have another exam today. Best strategy now? Nap or caffeine?", "response": "Getting only 5 hours of sleep before an exam is tough. Both napping and caffeine have pros and cons here. **Caffeine** can provide a temporary boost in alertness for the exam but might lead to a crash later and won't address the underlying sleep debt. **A short nap** (20-30 minutes) could improve alertness and potentially memory consolidation without causing too much grogginess, but finding time might be hard, and oversleeping could make you feel worse. **Strategy:** If possible, a short power nap earlier in the day (e.g., early afternoon) might be most beneficial for cognitive function. If napping isn't feasible, use caffeine strategically (e.g., one dose before the exam), stay hydrated, and prioritize getting recovery sleep *after* your exam today."},
    {"instruction": "Had a nightmare that woke me up around 4 AM. Heart was pounding. Took me an hour to get back to sleep. Impact?", "response": "Nightmares, especially vivid ones that wake you up with a physical stress response (pounding heart), can definitely impact sleep quality. They often occur during REM sleep. The awakening itself fragments your sleep, reducing total sleep time and efficiency. The lingering anxiety or stress from the nightmare can then make it difficult to fall back asleep (sleep maintenance insomnia), further impacting restfulness. While occasional nightmares are normal, frequent ones might be linked to stress, anxiety, trauma, or certain medications, and are worth discussing with a healthcare professional if they significantly disrupt your sleep or well-being."},
    {"instruction": "Explain the role of Ghrelin and Leptin in relation to sleep.", "response": "Ghrelin and Leptin are key hormones regulating hunger and fullness, and they are significantly influenced by sleep. **Ghrelin** is the 'hunger hormone' – it stimulates appetite. **Leptin** is the 'satiety hormone' – it signals fullness and suppresses appetite. When you are sleep-deprived, your body tends to produce **more ghrelin** and **less leptin**. This hormonal imbalance leads to increased feelings of hunger, reduced feelings of fullness, and often cravings for high-calorie, high-carbohydrate foods, contributing to the link between poor sleep and weight gain."},
    {"instruction": "What is Orexin (Hypocretin) and how does it relate to wakefulness?", "response": "Orexin (also called Hypocretin) is a crucial neurotransmitter produced in the hypothalamus. Its primary role is to **promote wakefulness and alertness**. Orexin neurons activate other wakefulness-promoting centers in the brain. The loss of these orexin-producing neurons is the underlying cause of narcolepsy, a condition characterized by excessive daytime sleepiness and sudden transitions into sleep. Maintaining stable orexin levels during the day is essential for sustained alertness."},
    {"instruction": "Beyond light, what other 'zeitgebers' (time-givers) influence our circadian rhythm?", "response": "While light (especially sunlight) is the most powerful 'zeitgeber' or external cue that synchronizes our internal circadian clock, other factors play a role too. These include: 1. **Meal Timing:** Regularly timed meals can help anchor the body's internal rhythms. 2. **Activity/Exercise Timing:** Regular physical activity patterns contribute to the cycle. 3. **Social Interaction:** Scheduled social engagements can act as cues. 4. **Temperature:** Environmental temperature cycles influence body temperature rhythms. Consistent timing of these cues reinforces a stable circadian rhythm, complementing the effect of the light-dark cycle."},
    {"instruction": "I work night shifts sometimes. How can I minimize the disruption to my sleep schedule?", "response": "Working night shifts is inherently disruptive to the natural circadian rhythm. To minimize harm: 1. **Maximize Darkness during Sleep:** Make your bedroom extremely dark (blackout curtains, eye mask) for daytime sleep. 2. **Control Light Exposure:** Wear dark sunglasses on the way home from a night shift to avoid morning light exposure. Use bright light therapy upon waking before your night shift starts. 3. **Maintain Consistency:** On workdays, stick to your daytime sleep schedule as consistently as possible. On days off, try to maintain a schedule closer to your night shift pattern or shift gradually, avoiding abrupt flip-flopping if possible. 4. **Optimize Environment:** Keep the sleep environment cool and quiet. Minimize caffeine late in your 'work night'. 5. **Strategic Napping:** A nap before the night shift can help improve alertness."},
    {"instruction": "What happens biologically if I consistently only get 6 hours of sleep when I need 8?", "response": "Consistently getting 6 hours when you need 8 results in chronic partial sleep deprivation, leading to cumulative 'sleep debt'. Biologically, this has several consequences: cognitive functions decline (attention, memory, decision-making), emotional regulation suffers (increased irritability, stress sensitivity), immune function weakens (making you more susceptible to illness), metabolic health is impacted (increased risk of weight gain, insulin resistance), stress hormone levels (cortisol) can become dysregulated, and your perception of performance often decreases less than your actual performance impairment (you don't realize how impaired you are)."},
    {"instruction": "Combined Data: Diary: Ate pizza and drank 2 glasses of wine at 9 PM. Bed 11 PM, tossed/turned until 12:30 AM. Woke 3 times feeling hot/uncomfortable. Alarm 7 AM, feel sluggish. Watch Data: TST 5h 45m, Eff 72%, Deep 6%, REM 14%, RHR 68, HRV 35. Analysis?", "response": "The combination of your diary and watch data clearly points to poor sleep quality driven by pre-bed choices. Eating a heavy meal (pizza) and drinking alcohol late significantly disrupted your sleep. This likely caused indigestion and elevated body temperature (feeling hot), contributing to the difficulty falling asleep (1.5h latency) and multiple awakenings. The result is very low total sleep (5h 45m), poor efficiency (72%), severely suppressed Deep (6%) and REM (14%) sleep, and physiological stress indicators (high RHR 68, very low HRV 35). Avoiding large meals and alcohol within 2-3 hours of bedtime is crucial for better sleep."},
    {"instruction": "My usual RHR is 55, HRV 60. Last night after a very stressful presentation: RHR 58, HRV 55. I slept 7.5 hours. Why aren't the metrics worse?", "response": "It's interesting that your sleep metrics weren't more significantly impacted after a very stressful day. While RHR is slightly up and HRV slightly down, the changes aren't dramatic. This could mean a few things: 1. Your stress response might not have lingered strongly into the night. 2. You might have good physiological resilience to stress. 3. The 7.5 hours of sleep, even if slightly less restorative than your absolute best, was sufficient to prevent a major deviation in the metrics measured by your device. It shows that sometimes the *perception* of stress doesn't perfectly correlate one-to-one with overnight physiological metrics, though often it does."},
    {"instruction": "If sleep efficiency is king, should I just restrict my time in bed to exactly match my average sleep time? Say, 6.5 hours in bed if I average 6.5 hours sleep?", "response": "That technique, called Sleep Restriction Therapy, is a core component of Cognitive Behavioral Therapy for Insomnia (CBT-I), but it should ideally be done under professional guidance. The goal *is* to increase sleep efficiency by reducing time spent awake in bed. However, initially restricting time in bed so drastically can lead to significant daytime sleepiness. It's usually implemented by slightly reducing time in bed (e.g., ensuring TIB isn't much longer than current TST + 30 mins) and then *gradually* increasing TIB as efficiency improves above 85-90%, without sacrificing efficiency. It's a specific therapeutic technique, not just a simple self-help trick."},
    {"instruction": "Can specific foods promote sleep? Like turkey or warm milk?", "response": "The idea that specific foods strongly promote sleep is mostly based on weak evidence or myth. Turkey contains tryptophan, an amino acid precursor to serotonin and melatonin, but likely not in high enough amounts relative to other amino acids to have a significant sedative effect. Warm milk's effect is often attributed more to the comforting ritual and warmth than a specific chemical property. While a balanced diet supports overall health, including sleep, relying on specific 'magic' foods isn't a primary strategy. Avoiding caffeine, alcohol, and heavy meals close to bed is generally more impactful."},
    {"instruction": "Explain sleep cycles and how they change through the night.", "response": "We cycle through different stages of sleep roughly every 90-110 minutes. A typical cycle starts with light NREM sleep (N1, then N2), progresses into deep NREM sleep (N3), and then transitions into REM sleep. After the REM stage, the cycle often repeats, possibly with a brief awakening. Importantly, the *proportion* of stages within each cycle changes as the night progresses. The cycles early in the night (first third) are typically dominated by **Deep Sleep (N3)**. As the night goes on, Deep Sleep decreases, and the duration of **REM sleep** periods increases, becoming longest in the cycles closer to morning."},
    {"instruction": "Is it true that 'social jetlag' from different weekend schedules is bad?", "response": "Yes, 'social jetlag' refers to the mismatch between your sleep schedule on work/school days versus free days (like weekends). Staying up much later and sleeping in significantly on weekends creates a discrepancy similar to traveling across time zones. This disrupts your body's internal clock (circadian rhythm), which can lead to difficulty falling asleep on Sunday night, trouble waking up on Monday morning, daytime sleepiness, reduced performance, and potentially contribute to metabolic health issues long-term. Maintaining a relatively consistent sleep schedule (+/- 1 hour) even on weekends is generally recommended to minimize social jetlag."},
    {"instruction": "Wearable says my SpO2 (oxygen saturation) dropped to 88% briefly last night. Is that okay?", "response": "Brief, occasional dips in oxygen saturation (SpO2) can sometimes occur during sleep, but dropping to 88% is generally considered quite low (normal is typically 95% or higher). While wearables aren't medical-grade devices and accuracy can vary, readings this low, even if brief, warrant attention. Frequent or significant drops could indicate potential breathing issues during sleep. For medical concerns or diagnoses, especially regarding oxygen levels, it's essential to consult with a qualified healthcare professional. They might recommend further testing like a formal sleep study (polysomnography)."},
    {"instruction": "How does Growth Hormone relate to sleep?", "response": "Human Growth Hormone (HGH) plays a vital role in growth, cell reproduction, and regeneration. A significant portion of the daily release of HGH occurs during deep sleep (Stage N3 or slow-wave sleep), particularly earlier in the night. This highlights the importance of sufficient deep sleep for physical repair, muscle growth, bone health, and overall recovery. Consistently getting inadequate deep sleep can potentially impair these restorative processes linked to HGH release."},
    {"instruction": "I took melatonin 1mg last night and still couldn't sleep. Should I take more?", "response": "Melatonin dosage and effectiveness can be complex. While 1mg is a relatively low dose, simply increasing the dose doesn't always lead to better results for general insomnia and can sometimes increase side effects like grogginess. Melatonin works best for shifting sleep timing (like jet lag) rather than acting as a strong sedative. If 1mg didn't help, it might be that melatonin isn't the right approach for your specific sleep difficulty, or the timing was off. It's best to discuss persistent sleep issues and supplement strategies with a healthcare professional rather than self-adjusting dosages significantly."},
    {"instruction": "Is morning sunlight really that important for setting my body clock?", "response": "Yes, exposure to bright light, particularly natural sunlight, shortly after waking up in the morning is arguably the most powerful signal for synchronizing your body's internal clock (circadian rhythm). This morning light exposure helps suppress melatonin production, reinforces the start of your 'daytime' phase, boosts alertness, and helps ensure you'll feel appropriately sleepy later that evening. Even 10-20 minutes of outdoor morning light can make a significant difference in regulating your sleep-wake cycle."},
    {"instruction": "Can screen 'Night Shift' or 'Blue Light Filter' modes really help sleep?", "response": "Features like 'Night Shift' or blue light filters work by reducing the amount of blue-wavelength light emitted by screens, making the light warmer (more yellow/orange). Since blue light is the most potent suppressor of melatonin, reducing exposure in the evening *can* be helpful and is certainly better than using a screen at full brightness/color temperature. However, these filters don't eliminate all alerting effects of light, nor do they address the mental stimulation from screen content itself. They are a helpful tool, but best used as *part* of a broader strategy that includes dimming overall lights and stopping screen use altogether 30-60 minutes before bed."},
    {"instruction": "Combined Data: Watch: TST 6h 30m, Eff 80%, Deep 10%, REM 16%. Diary: Woke up 3 times to use the bathroom, felt thirsty. Drank lots of water right before bed. Connection?", "response": "There's a very strong connection here. Drinking large amounts of fluids right before bed often leads to needing to urinate during the night, causing awakenings like the three times you noted. These awakenings severely fragment sleep, explaining the low efficiency (80%) and likely contributing to the low Deep (10%) and REM (16%) percentages, resulting in shorter total sleep time (6h 30m). While staying hydrated is important, try to drink most of your fluids earlier in the day and reduce intake significantly in the 1-2 hours before bedtime to minimize nighttime bathroom trips."},
    {"instruction": "My HRV is consistently high (avg 90ms), but my sleep duration is often only 6 hours due to busy schedule. Is high HRV enough?", "response": "Having a consistently high HRV (like 90ms) is generally a great sign of good cardiovascular health and nervous system balance (strong parasympathetic tone). However, it doesn't fully compensate for chronic lack of sleep duration. While your *recovery capacity* might be excellent (reflected in the HRV), consistently getting only 6 hours when you likely need more still leads to accumulating sleep debt. This impacts cognitive function, immune health, and long-term well-being, even if your HRV looks good. High HRV is positive, but sufficient sleep *duration* is also essential and non-negotiable for optimal health."},
    {"instruction": "What's the difference between Cognitive Behavioral Therapy for Insomnia (CBT-I) and just practicing good sleep hygiene?", "response": "Sleep hygiene provides general guidelines for healthy sleep habits (consistent schedule, cool dark room, avoiding stimulants etc.). CBT-I is a structured, evidence-based *therapy* program delivered by a trained professional. It includes sleep hygiene education but goes much further, incorporating cognitive techniques (addressing unhelpful thoughts/beliefs about sleep), behavioral strategies (like stimulus control and sleep restriction tailored to the individual), and relaxation training. CBT-I is considered the first-line treatment for chronic insomnia because it addresses the underlying psychological and behavioral factors maintaining the sleep problem, whereas hygiene alone is often insufficient for established insomnia."},
    {"instruction": "I often feel a 'second wind' of energy around 10 PM, even if I was tired earlier. Why?", "response": "That 'second wind' phenomenon is often related to your circadian rhythm and cortisol levels. Your body's internal clock naturally promotes alertness at certain times. There's often a dip in alertness in the evening as melatonin starts to rise, but sometimes, especially if you push through that initial sleepiness (perhaps due to evening activities or light exposure), your alerting system (potentially involving a small cortisol bump or other wakefulness signals) can kick back in for a period before the sleep drive becomes overwhelming again later. It can sometimes indicate you missed your optimal 'sleep window'."},
    {"instruction": "Can magnesium supplements help with sleep?", "response": "Magnesium plays a role in regulating neurotransmitters involved in sleep and relaxation, like GABA. Some research suggests magnesium supplementation *might* help improve sleep quality, particularly in individuals who are deficient or have specific conditions like Restless Legs Syndrome. However, the evidence for its effectiveness for general insomnia in people with adequate magnesium levels is mixed and not as strong as for other interventions. As with any supplement, it's best to discuss it with a healthcare professional first, as excessive intake can have side effects, and they can assess if it's appropriate for you."},
    {"instruction": "My Whoop showed a skin temp deviation of +1.2°F last night. I also feel a bit achy. Should I take an ibuprofen and go for my run?", "response": "A skin temperature deviation of +1.2°F along with feeling achy can sometimes be an early indicator that your body is fighting something off, like an illness. While I can't give medical advice or suggest medications like ibuprofen, it's generally wise to listen to your body. Pushing through with a run when you're potentially getting sick might hinder your recovery. It might be more beneficial to prioritize rest today and see how you feel. If you continue to feel unwell or have concerns about your symptoms, consulting a healthcare professional is the best course of action."},
    {"instruction": "Fitbit: TST 5h 30m, Efficiency 78%. Deep 8%, REM 12%. RHR 70bpm. Diary: Extremely stressful day, argued with boss, ate takeout late. Can you analyze my stress impact?", "response": "It sounds like a very tough day, and your sleep metrics clearly reflect significant physiological stress. The short total sleep (5h 30m), low efficiency (78%), severely suppressed Deep (8%) and REM (12%) sleep, and elevated RHR (70bpm) are all classic signs of how acute stress, compounded by late eating, can disrupt sleep architecture and recovery. The argument and overall stress likely kept your sympathetic nervous system highly activated, making it difficult to wind down and achieve restorative sleep. Managing daytime stress and avoiding heavy meals close to bedtime are key areas to focus on for improvement."},
    {"instruction": "Apple Watch: Average SpO2 96%, lowest 91%. Respiratory Rate avg 15. Is a brief dip to 91% SpO2 during sleep a major problem?", "response": "An average SpO2 of 96% during sleep is good. Brief, isolated dips in oxygen saturation can occur for various reasons, and a single dip to 91% isn't necessarily a major problem if it was short-lived and your average remains high. However, if such dips are frequent, prolonged, or go lower, or if you have symptoms like loud snoring, gasping, or excessive daytime sleepiness, it would be important to discuss this with a healthcare professional. They can help determine if further investigation is needed, as I cannot provide medical diagnoses."},
    {"instruction": "Garmin Body Battery recharged from 10 to only 40 despite 6h TST. Yesterday was a very light activity day. Why so little recharge if I wasn't physically tired?", "response": "Even on a light activity day, Body Battery recharge can be poor if your sleep quality was compromised or if mental/emotional stress was high. Six hours of Total Sleep Time is also on the shorter side for full recovery for most adults. Factors that could lead to poor recharge despite low physical exertion include: high mental stress (even if you didn't 'feel' physically active, your brain was working hard), poor sleep architecture (not enough deep or REM sleep within those 6 hours), consuming alcohol, or even the early stages of an illness. Check your detailed sleep metrics (HRV, RHR during sleep, stress graph) in the Garmin app for more clues."},
    {"instruction": "Oura Ring Readiness 55. HRV Balance 'Pay Attention', RHR 'Pay Attention'. Sleep Score 68 ('Fair'). What's the interplay here?", "response": "This combination suggests that while your sleep duration or some aspects might have been okay (leading to a 'Fair' Sleep Score of 68), key physiological recovery markers are off, significantly impacting your overall Readiness (55). 'Pay Attention' for HRV Balance likely means your HRV was lower than your baseline, and for RHR, it was likely higher than baseline. These are strong indicators that your body didn't recover optimally overnight, possibly due to stress, illness, poor sleep quality (despite fair duration), or recent strain. The low Readiness score is a direct consequence of these physiological stress signals."},
    {"instruction": "Diary: Bed 12am, lights out 12:15am. Took 2mg melatonin. Woke 8am feeling groggy. Watch: TST 7h, Eff 88%, Deep 15%, REM 20%. Why the grogginess if melatonin is supposed to help?", "response": "While your sleep duration (7h) and efficiency (88%) look reasonable, and your Deep/REM percentages are okay, morning grogginess after taking melatonin can sometimes occur. This 'melatonin hangover' can happen if the dose was a bit too high for you, if it was taken too late, or if your body is sensitive to it. Melatonin primarily helps regulate the timing of sleep; it's not a strong sedative for everyone. The grogginess might also be simple sleep inertia. If you regularly experience this with melatonin, you might consider if the timing is optimal or if a lower dose (or none) might be better, but it's always good to discuss supplement use with a healthcare professional."},
    {"instruction": "I had a dream I was flying, then woke up. Is that REM sleep?", "response": "It's highly likely! Vivid, narrative, and often bizarre dreams like flying are most characteristic of REM (Rapid Eye Movement) sleep. Waking up directly from such a dream often means you were in an REM stage. REM sleep is when brain activity is quite high, similar to wakefulness in some ways, which contributes to these intense dream experiences."},
    {"instruction": "Explain the concept of 'sleep debt' and how long it takes to 'repay' it.", "response": "Sleep debt, or sleep deficit, is the cumulative effect of not getting enough sleep over days, weeks, or longer. If you need 8 hours but consistently get 6, you accumulate 2 hours of sleep debt each night. This debt impairs cognitive function, mood, and physical health. While you can 'repay' some acute sleep debt by sleeping longer for a few nights (e.g., on a weekend), it's thought that fully recovering from chronic, long-term sleep debt can take much longer, potentially days or even weeks of consistently adequate sleep. Moreover, some research suggests that some cognitive impacts of severe chronic sleep loss may not be fully reversible quickly. Preventing sleep debt by prioritizing consistent, sufficient sleep is the best strategy."},
    {"instruction": "What are 'hypnagogic jerks' and are they normal?", "response": "Hypnagogic jerks (or hypnic jerks) are those sudden, involuntary muscle twitches or jolts you might experience just as you're falling asleep (in the N1 stage). They can sometimes be accompanied by a sensation of falling or a brief visual flash. Yes, they are very common and generally considered a normal physiological phenomenon. Their exact cause isn't fully understood but might be related to the nervous system transitioning into sleep mode. They are usually harmless, though they can be startling."},
    {"instruction": "My child grinds their teeth loudly in their sleep. Is this something I should worry about?", "response": "Teeth grinding during sleep, also known as bruxism, is quite common in children and often resolves on its own. However, if it's very frequent, loud, or if your child complains of jaw pain, headaches, or you notice tooth wear, it's a good idea to mention it to their dentist or pediatrician. They can assess the situation and determine if any intervention, like a night guard, is needed. As a sleep coach, I can provide general sleep information, but specific concerns about medical or dental conditions like bruxism should be addressed by a healthcare professional."},
    {"instruction": "I want to know my exact chronotype. Can you tell me?", "response": "While I can discuss general concepts of chronotypes like 'morning larks' or 'night owls,' determining your exact, individual chronotype usually involves specific questionnaires (like the Morningness-Eveningness Questionnaire - MEQ) or sometimes even physiological measurements under controlled conditions. As an AI sleep coach, I don't have the tools to administer those tests or provide a formal chronotype assessment. You can find such questionnaires online or discuss it with a sleep specialist if you're looking for a detailed evaluation."},
    {"instruction": "What's the best way to calculate my total daily sleep need?", "response": "Determining your precise individual sleep need can be a bit tricky as it varies. A good practical method is to, when you have a period with a flexible schedule (like a vacation), go to bed when you feel tired and wake up naturally without an alarm for several consecutive days. The average amount of sleep you get during that period, when you feel well-rested, is likely close to your individual need. For most adults, this falls within the 7-9 hour range, but some individuals might naturally need slightly more or less. Pay attention to how you feel during the day on different amounts of sleep."},
    {"instruction": "I'm trying to improve my sleep. Should I focus on getting more Deep sleep or more REM sleep?", "response": "Both Deep sleep and REM sleep are crucial for different aspects of restoration, and you ideally want healthy amounts of both. Deep sleep is primarily for physical repair and feeling physically refreshed, while REM sleep is more for cognitive functions, memory consolidation, and emotional regulation. Rather than trying to target one specifically (which is hard to do directly), focus on overall good sleep hygiene and getting sufficient total sleep time. When you create the right conditions for healthy sleep overall, your body typically self-regulates the proportions of Deep and REM sleep according to its needs. If your tracker consistently shows very low levels of one or both despite good overall habits, then it might be worth investigating further."},
    {"instruction": "If I feel sleepy while driving, is it okay to just open the window or turn up the music?", "response": "Opening the window or turning up the music are temporary measures that might provide a very brief increase in alertness, but they **do not solve the underlying problem of driver fatigue** and can give a false sense of security. If you are feeling sleepy while driving, the safest and only truly effective solutions are to pull over in a safe place and take a short nap (15-20 minutes) or stop driving altogether until you are properly rested. Drowsy driving is extremely dangerous and similar in impairment to drunk driving."},
    {"instruction": "I keep hitting the snooze button 5 times every morning. Is this habit bad for me?", "response": "Yes, consistently hitting the snooze button can be detrimental. That fragmented, light sleep you get between alarms isn't very restorative and can actually contribute to 'sleep inertia,' making you feel groggier for longer when you finally get out of bed. It also disrupts your body's natural waking process. It's generally better to set your alarm for the latest possible time you actually need to wake up and then get out of bed when it first goes off. If you struggle with this, it might indicate you're not getting enough total sleep, or your sleep schedule needs adjustment."},
    {"instruction": "Tell me about the Glymphatic system and its role during sleep.", "response": "The glymphatic system is a fascinating 'waste clearance' system in the brain that is significantly more active during sleep, particularly deep sleep. Think of it like the brain's internal plumbing that flushes out metabolic byproducts and toxins that accumulate during wakefulness, including proteins like amyloid-beta, which is associated with Alzheimer's disease. During sleep, brain cells appear to shrink slightly, increasing the space between them, allowing cerebrospinal fluid to flow more effectively and clear out these waste products. This highlights another critical reason why sufficient, high-quality sleep is essential for brain health and cognitive function."},
    {"instruction": "I'm a new parent and my sleep is totally wrecked by my baby waking up. Any survival tips?", "response": "Being a new parent is incredibly demanding on sleep, and it's a phase where 'perfect' sleep hygiene often goes out the window. Focus on survival and maximizing rest when you can: 1. **Sleep when the baby sleeps:** This is cliché but true. Even short naps can help. 2. **Share night duties:** If you have a partner, take turns with night wakings if possible. 3. **Simplify everything else:** Lower your expectations for housework, social commitments, etc., to conserve energy. 4. **Accept help:** If friends or family offer to watch the baby so you can nap, take them up on it. 5. **Prioritize your own basic needs:** Stay hydrated and try to eat nutritious food. It's about getting through this period. Things will eventually get better as the baby's sleep matures."},
    {"instruction": "What's the deal with 'polyphasic sleep' schedules like Uberman? Are they effective?", "response": "Polyphasic sleep schedules, like the Uberman schedule (which involves multiple short naps throughout the day with very little or no core night sleep), are highly controversial and generally not recommended by sleep science. While some individuals claim to adapt, the vast majority of people cannot sustain these schedules long-term without significant performance decrements, cognitive impairment, and accumulating severe sleep debt. Our biology is strongly wired for a monophasic or biphasic (one long night sleep + optional short nap) pattern, anchored by a consolidated nighttime sleep period. Extreme polyphasic schedules often lead to chronic sleep deprivation."},
    {"instruction": "Combined: Diary: Bed 10:30pm, asleep by 11. Woke 7:30am. Feeling good. Watch: TST 8h, Eff 94%, Deep 2h (25%), REM 2h (25%). Last night RHR 50, HRV 70. Tonight: RHR 58, HRV 50. Had two glasses of wine with dinner tonight around 7pm. Could that explain the HRV/RHR change despite good sleep duration?", "response": "Yes, that's a very likely explanation. Even though your sleep duration and perceived quality were good both nights, and your sleep stages were excellent, alcohol consumption can significantly impact overnight recovery metrics. The two glasses of wine, even a few hours before bed, likely caused your sleeping Heart Rate (RHR) to be higher (58 vs 50) and your Heart Rate Variability (HRV) to be lower (50 vs 70) compared to the night without alcohol. Alcohol is a stressor on the body, and these physiological metrics reflect that, even if you still managed to get a good amount of sleep. This highlights how Whoop/Garmin etc. can pick up on subtle physiological stress."},
    {"instruction": "User asking about 'how to lucid dream'.", "response": "Lucid dreaming, where you become aware that you're dreaming while still in the dream, is a fascinating topic. Techniques often discussed include reality testing during the day (regularly asking yourself if you're dreaming), keeping a dream journal to improve dream recall, and specific methods like Mnemonic Induction of Lucid Dreams (MILD) or Wake-Back-to-Bed (WBTB). There's a lot of information available online from lucid dreaming communities and researchers if you'd like to explore those techniques further. As a sleep coach, I can point you to the concept, but I don't provide specific training protocols for it."},
    {"instruction": "I want to optimize my bedroom for sleep. Give me a quick checklist.", "response": "Great idea! For an optimal sleep bedroom: 1. **Darkness:** As dark as possible (blackout curtains, cover LEDs). 2. **Quiet:** Minimize noise (earplugs, white noise machine if needed). 3. **Cool Temperature:** Aim for 60-67°F (15-19°C). 4. **Comfortable Bed:** Supportive mattress, comfortable pillows and bedding. 5. **No Clutter:** A tidy room can be more calming. 6. **No Screens:** Remove TVs, computers if possible, and avoid using phones in bed. 7. **Good Air Quality:** Ensure decent ventilation."},
    {"instruction": "My watch sometimes says I have '0 minutes Deep Sleep'. Is that possible or a sensor error?", "response": "While it's *highly unlikely* to have absolutely zero minutes of Deep Sleep if you slept for any significant duration, consumer wearables can sometimes misclassify sleep stages, especially if sleep is very fragmented, movement artifacts are high, or sensor contact is poor. If it's an isolated incident and other metrics look okay and you feel rested, it might be a sensor or algorithm quirk for that night. However, if your watch *consistently* reports very little or no Deep Sleep, and you also feel unrefreshed, it would be worth looking into improving sleep hygiene. If concerns persist, discussing it with a healthcare professional who might suggest a clinical sleep study for accurate staging could be considered, as I cannot diagnose sensor issues or medical conditions."},
    {"instruction": "I'm worried I have Restless Legs Syndrome. My legs feel creepy-crawly at night when I try to relax in bed.", "response": "The symptoms you're describing—an uncomfortable, creepy-crawly sensation in the legs at rest, especially in the evening or at night, with an urge to move them—are characteristic of Restless Legs Syndrome (RLS). RLS is a neurological condition that can significantly disrupt sleep. For a proper diagnosis and to discuss potential management strategies, it's very important to consult with a healthcare professional. They can evaluate your symptoms thoroughly."},
    {"instruction": "What is the best sleeping position for health?", "response": "There isn't one single 'best' sleeping position that suits everyone, as it can depend on individual health conditions. However, generally, sleeping on your **back** (supine) is often considered good for spinal alignment and can help with acid reflux if your head is slightly elevated. Sleeping on your **side** (especially the left side) is also often recommended, particularly for pregnant women, people with acid reflux, or those who snore or have mild sleep apnea (as it helps keep airways open). Sleeping on your **stomach** (prone) is generally least recommended as it can strain the neck and spine. The most important thing is to find a position that is comfortable for you and allows for restful sleep."},
    {"instruction": "Can you give me some specific examples of a 'relaxing wind-down routine'?", "response": "Certainly! A relaxing wind-down routine helps signal to your body and mind that it's time to prepare for sleep. Aim for 30-60 minutes of calm activities. Examples include: 1. Taking a warm (not hot) bath or shower. 2. Reading a physical book under dim, warm light (avoid exciting thrillers!). 3. Listening to calm, relaxing music or a soothing podcast/audiobook. 4. Gentle stretching or yoga. 5. Practicing mindfulness meditation or deep breathing exercises. 6. Journaling – writing down worries or thoughts from the day to get them out of your head. The key is to choose activities you find personally calming and to do them consistently each night."}
]

# --- Save Data to JSON Lines file ---
dataset_filename = "sleep_coach_finetuning_data.jsonl" # Keep standard filename
print(f"\n💾 Saving {len(fine_tuning_examples)} examples to '{dataset_filename}'...")
try:
    # Reasoning: Open the file in write mode ('w'). Iterate through the list of dictionaries.
    # Use json.dump to convert each dictionary to a JSON string and write it to the file.
    # Write a newline character ('\n') after each JSON object to create the JSON Lines format.
    with open(dataset_filename, 'w') as f:
        for entry in fine_tuning_examples:
            json.dump(entry, f)
            f.write('\n')
    print("✅ Data saved successfully.")
except Exception as e:
    print(f"❌ Error saving data to {dataset_filename}: {e}")

# --- Load Data using Hugging Face Datasets ---
sleep_dataset = None # Initialize variable
# Reasoning: Define the expected structure (Features) of the dataset for clarity and basic validation during loading.
expected_features = Features({'instruction': Value('string'), 'response': Value('string')})
if os.path.exists(dataset_filename):
    print(f"\n⏳ Loading dataset from '{dataset_filename}'...")
    try:
        # Reasoning: Load the dataset from the saved JSON Lines file using datasets.load_dataset.
        # Specify 'json' as the format type. 'split=train' indicates this data is for training.
        sleep_dataset = load_dataset('json', data_files=dataset_filename, features=expected_features, split='train')
        print("✅ Dataset loaded successfully:")
        print(f"   Number of examples: {len(sleep_dataset)}")
        print("\n   First example in loaded dataset:")
        # Reasoning: Display the first example to visually confirm data integrity after loading.
        print(sleep_dataset[0])
    except Exception as e:
        print(f"❌ Error loading dataset from file: {e}")
        print("   Ensure the file was saved correctly and is valid JSON Lines format.")
else:
    print(f"❌ Error: Dataset file '{dataset_filename}' not found. Cannot load.")

# @title Step 9: Configure and Run Fine-Tuning (PEFT/LoRA)
# Reasoning: This cell adapts the pre-trained LLM to the specific task of sleep coaching
# using the custom dataset prepared in the previous step. It uses Parameter-Efficient Fine-Tuning (PEFT)
# with LoRA (Low-Rank Adaptation) to make this feasible on Colab's limited resources.

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling
import time
import datetime
import torch
import os

# --- Prerequisites Check ---
# Reasoning: Ensure the necessary components (base model, tokenizer, dataset) are loaded and ready before starting the expensive fine-tuning process.
print("--- Checking Prerequisites for Fine-Tuning ---")
ready_to_train = True
if 'llm_model' not in locals() or llm_model is None:
    print("❌ Prerequisite Failed: Base LLM model ('llm_model') not loaded. Run Step 4.")
    ready_to_train = False
if 'llm_tokenizer' not in locals() or llm_tokenizer is None:
    print("❌ Prerequisite Failed: LLM Tokenizer ('llm_tokenizer') not loaded. Run Step 4.")
    ready_to_train = False
if 'sleep_dataset' not in locals() or sleep_dataset is None:
    print("❌ Prerequisite Failed: Fine-tuning dataset ('sleep_dataset') not loaded. Run Step 8.")
    ready_to_train = False

if not ready_to_train:
    print("\n🛑 Cannot proceed with fine-tuning due to missing prerequisites.")
else:
    print("✅ Prerequisites met. Configuring fine-tuning...\n")

    # --- Fine-Tuning Configuration ---
    # Reasoning: Set the parameters that control the fine-tuning process.
    output_dir = "./sleep-coach-lora-adapter-mistral7b" # Directory where the resulting adapter weights will be saved.
    num_train_epochs = 3      # How many times the model will see the entire training dataset. More epochs can lead to better learning but also overfitting, especially with small datasets. Start with 1-5.
    per_device_train_batch_size = 2 # How many training examples to process on the GPU at once. Limited by GPU memory (VRAM). Small values like 1, 2, 4 are common on Colab T4.
    gradient_accumulation_steps = 8 # Simulate a larger batch size by accumulating gradients over multiple smaller steps. Effective batch size = batch_size * grad_accum = 2 * 8 = 16. Helps stabilize training.
    learning_rate = 2e-4      # Controls how much the model weights are adjusted during training. 1e-4 to 5e-4 is a common range for LoRA.
    max_seq_length = 512      # Maximum number of tokens in a sequence fed to the model. Longer sequences require more memory. Truncate or pad examples to this length.
    logging_steps = 5         # How often (in training steps) to print the training loss. Useful for monitoring progress.
    save_steps = 25           # How often to save a checkpoint of the LoRA adapter during training. Useful for resuming or selecting the best adapter. Adjust based on dataset size.
    lora_r = 16               # LoRA rank (dimension of the low-rank matrices). Controls the number of trainable parameters in the adapter. 8, 16, 32, 64 are common values.
    lora_alpha = 32           # LoRA scaling factor. Often set to 2 * lora_r.
    lora_dropout = 0.05       # Dropout rate applied to LoRA layers to prevent overfitting.

    # --- Prepare Model for PEFT/Quantized Training ---
    print("⏳ Preparing base model for PEFT...")
    # Reasoning: Enable gradient checkpointing within the base model. This saves significant memory during training
    # by recomputing activations during the backward pass instead of storing them all.
    if hasattr(llm_model, 'gradient_checkpointing_enable'):
         llm_model.gradient_checkpointing_enable()
    # Reasoning: Prepare the bitsandbytes-quantized model for training with PEFT. This might involve minor layer adjustments.
    llm_model_prepared = prepare_model_for_kbit_training(llm_model)
    print("✅ Base model prepared.")

    # --- Define LoRA Configuration ---
    # Reasoning: Configure the LoRA adapter layers that will be added to the base model.
    lora_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        # Reasoning: Specify which layers/modules of the base model to apply LoRA to.
        # For Mistral-7B, targeting the query ('q_proj') and value ('v_proj') projection layers in the attention mechanism is standard and effective.
        target_modules=["q_proj", "v_proj"],
        lora_dropout=lora_dropout,
        bias="none", # Typically set to 'none' for LoRA, meaning biases in the adapted layers are not trained.
        task_type=TaskType.CAUSAL_LM # Specify the task type (important for PEFT).
    )
    print(f"✅ LoRA configured (r={lora_r}, alpha={lora_alpha}).")

    # --- Create PEFT Model ---
    # Reasoning: Apply the LoRA configuration to the prepared base model. This doesn't duplicate the base model,
    # but rather inserts the small, trainable LoRA adapter layers into the specified target modules.
    peft_model = get_peft_model(llm_model_prepared, lora_config)
    print("✅ PEFT model created by adding LoRA adapter.")
    # Reasoning: Display information about the trainable parameters. This highlights the efficiency of LoRA –
    # typically <1% of the total parameters are trained.
    peft_model.print_trainable_parameters()

    # --- Preprocess & Tokenize Dataset for Training ---
    # Reasoning: Define a function to format each instruction/response pair into the single text sequence
    # required for Causal LM fine-tuning, using the specific format for Mistral Instruct. Then, tokenize this text.
    def preprocess_finetune_data(examples):
        # Format: <s>[INST] Instruction [/INST] Response</s>
        # Add EOS token after the response to signal completion.
        texts = [f"<s>[INST] {instr} [/INST] {resp}{llm_tokenizer.eos_token}"
                 for instr, resp in zip(examples['instruction'], examples['response'])]
        # Reasoning: Tokenize the formatted text. padding="max_length" ensures all sequences have the same length (max_seq_length),
        # adding padding tokens where needed. truncation=True cuts off sequences longer than max_seq_length.
        return llm_tokenizer(texts, padding="max_length", truncation=True, max_length=max_seq_length)

    print(f"\n⏳ Preprocessing and tokenizing dataset (max_seq_length={max_seq_length})...")
    # Reasoning: Apply the preprocessing function to the entire dataset using .map(). batched=True speeds this up.
    # remove_columns=... removes the original text columns, leaving only the tokenized 'input_ids', 'attention_mask', etc.
    tokenized_dataset = sleep_dataset.map(preprocess_finetune_data, batched=True, remove_columns=sleep_dataset.column_names)
    print(f"✅ Dataset ready for training ({len(tokenized_dataset)} examples).")

    # --- Configure Training Arguments ---
    # Reasoning: Set up the detailed configuration for the Hugging Face Trainer class.
    training_args = TrainingArguments(
        output_dir=output_dir,                     # Directory to save checkpoints and logs.
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        logging_steps=logging_steps,               # Log metrics every N steps.
        save_steps=save_steps,                     # Save checkpoint every N steps.
        fp16=True,                                 # Enable mixed-precision training (uses float16). Necessary for speed and memory saving on T4 with bitsandbytes.
        # bf16=False,                              # bf16 (bfloat16) is another mixed precision type, often better on newer GPUs (Ampere+), but fp16 is generally safer on T4.
        gradient_checkpointing=True,               # Reconfirm gradient checkpointing is enabled (saves memory).
        optim="paged_adamw_8bit",                  # Optional: Use a memory-efficient AdamW optimizer variant from bitsandbytes. Can help prevent OOM errors.
        save_total_limit=3,                        # Keep only the latest 3 checkpoints to save disk space.
        dataloader_num_workers=2,                  # Use multiple workers for loading data in parallel (can speed up training slightly).
        # lr_scheduler_type="cosine",              # Optional: Use a learning rate scheduler (e.g., cosine decay) for potentially better convergence.
        # warmup_steps=int(0.03 * len(tokenized_dataset) * num_train_epochs / gradient_accumulation_steps), # Optional: Number of initial steps with a lower learning rate to stabilize training.
        report_to="none",                          # Disable reporting to external platforms like Weights & Biases or TensorBoard.
        # evaluation_strategy="steps",             # Optional: Enable evaluation during training (requires eval_dataset).
        # eval_steps=save_steps,                   # Optional: Evaluate every N steps.
    )

    # --- Initialize Trainer ---
    # Reasoning: The Trainer class orchestrates the entire fine-tuning loop.
    trainer = Trainer(
        model=peft_model,                           # Pass the PEFT model (adapter + base) to be trained.
        args=training_args,                         # Pass the training configuration.
        train_dataset=tokenized_dataset,            # Pass the prepared training dataset.
        # eval_dataset=None,                        # Optional: Pass a validation dataset for evaluation.
        tokenizer=llm_tokenizer,                    # Pass the tokenizer for data collation.
        # Reasoning: Handles batching and padding. DataCollatorForLanguageModeling dynamically pads sequences in each batch to the longest sequence in that batch (more efficient than padding all to max_seq_length). mlm=False for Causal LM.
        data_collator=DataCollatorForLanguageModeling(tokenizer=llm_tokenizer, mlm=False),
    )

    # --- Start Fine-Tuning ---
    print("\n--- 🚀 Initiating Fine-Tuning ---")
    start_time = time.time()
    # Reasoning: This command executes the training loop based on the provided configuration.
    # It will iterate through the data, calculate loss, update adapter weights, log progress, and save checkpoints.
    train_result = trainer.train()
    end_time = time.time()
    print("--- ✅ Fine-Tuning Completed ---")

    # --- Save Final LoRA Adapter ---
    # Reasoning: After training finishes, explicitly save the final state of the LoRA adapter weights and configuration.
    final_adapter_path = os.path.join(output_dir, "final_adapter")
    print(f"\n💾 Saving final trained LoRA adapter to: {final_adapter_path}")
    peft_model.save_pretrained(final_adapter_path)
    # Reasoning: Also save the tokenizer configuration alongside the adapter. This ensures that the correct tokenizer
    # can be easily loaded later when loading the fine-tuned model.
    llm_tokenizer.save_pretrained(final_adapter_path)
    print("✅ Final adapter and tokenizer saved.")

    # --- Log Metrics and Duration ---
    # Reasoning: Provide summary information about the training run.
    training_duration_seconds = end_time - start_time
    training_duration_formatted = str(datetime.timedelta(seconds=training_duration_seconds))
    print(f"\n⏱️ Total Fine-tuning Duration: {training_duration_formatted}")
    # Optional: Save training metrics if needed
    # trainer.log_metrics("train", train_result.metrics)
    # trainer.save_metrics("train", train_result.metrics)
    # trainer.save_state() # Saves optimizer state etc. if you need to resume training later

    # --- Clean Up GPU Memory ---
    # Reasoning: Explicitly delete the large model and trainer objects and clear PyTorch's GPU cache
    # to free up memory for subsequent steps, especially important in Colab's limited environment.
    print("\n🧹 Cleaning up GPU memory...")
    del peft_model
    del llm_model_prepared # Delete the prepared model too
    # Keep llm_model (original base) if you want to compare later, otherwise delete it too:
    # del llm_model
    del trainer
    torch.cuda.empty_cache()
    print("✅ Memory cleanup attempted.")

# @title Step 10: Load Fine-Tuned Model (Apply LoRA Adapter to Base)
# Reasoning: Load the original base LLM again (if it was unloaded or runtime restarted)
# and then apply the trained LoRA adapter weights onto it to create the final, fine-tuned model for inference.

from peft import PeftModel # Class for loading PEFT adapters.
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig # Ensure imports are available

# --- Configuration ---
# Reasoning: Define the path to the directory where the FINAL adapter was saved by the Trainer in Step 9.
final_adapter_path = "./sleep-coach-lora-adapter-mistral7b/final_adapter" # Ensure this matches the save path from Step 9.

# --- Function to Reload Base Model if Necessary ---
# Reasoning: Encapsulate the base model loading logic to avoid repetition.
def reload_base_model_and_tokenizer(model_id, quant_config):
    print(f"⏳ Reloading base model '{model_id}' with quantization...")
    model = None
    tokenizer = None
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=quant_config,
            device_map="auto",
            trust_remote_code=True,
        )
        print("✅ Base model reloaded.")
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "right"
        print("✅ Tokenizer reloaded.")
    except Exception as e:
        print(f"❌ Failed to reload base model/tokenizer: {e}")
    return model, tokenizer

# --- Load Base Model (Only if it doesn't exist) ---
# Reasoning: Check if the 'llm_model' object still exists in memory. If not (e.g., due to cleanup or restart), reload it.
if 'llm_model' not in locals() or llm_model is None:
    print("Base model not found in memory. Attempting reload...")
    # Ensure necessary config exists (might be lost after restart)
    if 'base_model_id' not in locals(): base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
    if 'bnb_config' not in locals(): bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True)
    llm_model, llm_tokenizer = reload_base_model_and_tokenizer(base_model_id, bnb_config)
elif 'llm_tokenizer' not in locals() or llm_tokenizer is None:
     # Handle case where only tokenizer is missing
     print("Base model found, but tokenizer missing. Reloading tokenizer...")
     if 'base_model_id' not in locals(): base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
     llm_tokenizer = AutoTokenizer.from_pretrained(base_model_id)
     llm_tokenizer.pad_token = llm_tokenizer.eos_token
     llm_tokenizer.padding_side = "right"
     print("✅ Tokenizer reloaded.")


# --- Load and Apply LoRA Adapter ---
model_finetuned = None # Initialize variable for the final fine-tuned model
# Reasoning: Check if the base model loaded successfully AND if the saved adapter directory exists.
if llm_model is not None and llm_tokenizer is not None and os.path.exists(final_adapter_path):
    print(f"\n⏳ Loading fine-tuned LoRA adapter from: '{final_adapter_path}'...")
    try:
        # Reasoning: Use PeftModel.from_pretrained to load the adapter weights specified by 'final_adapter_path'
        # and apply them dynamically ON TOP of the existing 'llm_model' object.
        model_finetuned = PeftModel.from_pretrained(llm_model, final_adapter_path)

        # --- Optional: Merge Adapter Weights ---
        # Reasoning: You can optionally merge the LoRA weights directly into the base model's weights.
        # This creates a standard model object without the PEFT wrapper, which might be slightly faster for inference
        # but requires more memory initially and makes it harder to switch adapters later. Usually NOT needed just for inference.
        # print("   Merging adapter weights into base model...")
        # model_finetuned = model_finetuned.merge_and_unload()
        # print("   Weights merged.")

        # Reasoning: Set the combined model to evaluation mode. This disables layers like dropout
        # that are only used during training, ensuring consistent inference results.
        model_finetuned.eval()
        print(f"✅ Fine-tuned PEFT model ready. Using base model '{base_model_id}' + adapter '{final_adapter_path}'.")

    except Exception as e:
        print(f"❌ Error loading LoRA adapter: {e}")
        print("   Ensure the adapter path is correct and points to the directory containing 'adapter_config.json', etc.")
        model_finetuned = None
elif not os.path.exists(final_adapter_path):
     print(f"❌ Error: Fine-tuned adapter path not found at '{final_adapter_path}'. Did Step 9 complete and save correctly?")
else:
     print("❌ Error: Cannot load fine-tuned adapter because the base model or tokenizer failed to load.")

import re

# @title Step 11: Interact with the FINE-TUNED Sleep Coach Agent
# Reasoning: This cell runs the main interaction loop, allowing you to chat (via text or uploaded audio)
# with the sleep coach agent that is now using the fine-tuned LLM.

import sys # For checking if running in Colab

# --- Prerequisite Check ---
print("--- Checking Prerequisites for Interaction ---")
ready_to_interact = True
if 'model_finetuned' not in locals() or model_finetuned is None:
    print("❌ Prerequisite Failed: Fine-tuned model ('model_finetuned') not loaded. Run Step 10.")
    ready_to_interact = False
if 'llm_tokenizer' not in locals() or llm_tokenizer is None:
    print("❌ Prerequisite Failed: LLM Tokenizer ('llm_tokenizer') not loaded. Ensure Step 4 or 10 succeeded.")
    ready_to_interact = False
if 'stt_model' not in locals() or stt_model is None:
    print("❌ Prerequisite Failed: STT model ('stt_model') not loaded. Run Step 2.")
    ready_to_interact = False
if 'tts_voice' not in locals() or tts_voice is None:
    print("❌ Prerequisite Failed: TTS voice ('tts_voice') not loaded. Run Step 3.")
    ready_to_interact = False

if not ready_to_interact:
    print("\n🛑 Cannot start interaction loop due to missing prerequisites.")
else:
    print("✅ Prerequisites met. Starting interaction loop...\n")

    # --- Interaction Loop ---
    # Reasoning: Reset the specific conversation history for the fine-tuned model before starting.
    conversation_history_finetuned = [] # Use the dedicated history list
    print("="*60)
    print("🗣️ 💬 INTERACTING WITH FINE-TUNED NAPCOACH 💤")
    print("="*60)
    print("Ask sleep-related questions. Try queries related to your fine-tuning data!")
    print("Commands: type 'quit' or 'exit' to end, type 'upload' for audio input.")
    print("-" * 60)

    while True:
        try:
            # Reasoning: Get user input from the Colab input prompt.
            user_input_text = input("You: ")
        except EOFError:
            print("\nInput stream ended. Exiting interaction.")
            break # Exit loop if input stream closes

        # --- Handle Exit Command ---
        if user_input_text.lower().strip() in ['quit', 'exit']:
            farewell_message = "Okay, ending our session. Wishing you restful sleep!"
            print(f"\nNapCoach: {farewell_message}")
            speak_text_and_display(farewell_message)
            break # Exit the while loop

        processed_input_for_llm = None
        # --- Handle Audio Upload Command ---
        if user_input_text.lower().strip() == 'upload':
            # Reasoning: If user types 'upload', call the function defined in Step 5 to handle audio upload and transcription.
            transcribed_text = transcribe_uploaded_audio()
            if transcribed_text:
                print(f"\n    Mapped audio to text: '{transcribed_text}'")
                processed_input_for_llm = transcribed_text # Use the transcribed text as input
            else:
                print("   Audio transcription failed or was cancelled. Please type your query instead.")
                continue # Skip to the next loop iteration without calling LLM

        # --- Handle Text Input ---
        else:
            # Use the text directly if it wasn't the upload command
            processed_input_for_llm = user_input_text

        # --- Generate and Speak Response ---
        # Reasoning: Only proceed if we have valid text input (either typed or from successful transcription).
        if processed_input_for_llm and processed_input_for_llm.strip():
            # Reasoning: Call the 'get_llm_response' function (defined in Step 6).
            # Crucially, pass the 'model_finetuned' object and the 'conversation_history_finetuned' list.
            assistant_response_text = get_llm_response(
                user_message=processed_input_for_llm,
                model_to_use=model_finetuned,        # <<< Using the FINE-TUNED model >>>model_finetuned
                tokenizer_to_use=llm_tokenizer,
                history=conversation_history_finetuned, # <<< Using the FINE-TUNED history >>>conversation_history_finetuned
                is_finetuned_model=True
            )

            # Reasoning: Call the 'speak_text_and_display' function (defined in Step 7)
            # to synthesize the generated text response into audible speech and display the player.
            if assistant_response_text:
              # --- Pre-process text for TTS ---
              tts_processed_text = assistant_response_text

              # 1. Handle specific, known phrases first (if any are particularly tricky for regex or common)
              #    (You can keep some of your original replacements here if they are very specific and safe)
              #    Example: (though the regex below should cover these specific hour ranges too)
              #    tts_processed_text = tts_processed_text.replace("7-9 hours", "7 to 9 hours")

              # 2. Use regex to handle general numerical ranges (e.g., "N-M")
              #    Pattern: (\d+) means "one or more digits" (captured in group 1)
              #             -       means a literal dash
              #             (\d+) means "one or more digits" (captured in group 2)
              #    Replacement: r'\1 to \2' means "content of group 1, followed by ' to ', followed by content of group 2"
              tts_processed_text = re.sub(r'(\d+)-(\d+)', r'\1 to \2', tts_processed_text)

              # 3. Handle ranges with units more carefully if needed, or let the above regex handle most cases.
              #    For instance, "60-67 degrees Fahrenheit" would become "60 to 67 degrees Fahrenheit" by the regex above.
              #    If you have specific units that need special handling, you might add more targeted regex or string replaces.
              #    Example for percentages (if the generic one doesn't do what you want or has side effects):
              #    tts_processed_text = re.sub(r'(\d+)-(\d+)%', r'\1 to \2 percent', tts_processed_text)

              print(f"   TTS Input (Original): '{assistant_response_text}'") # For debugging
              print(f"   TTS Input (Processed with Regex): '{tts_processed_text}'") # For debugging
              speak_text_and_display(tts_processed_text)

        else:
            # Handle cases where input was empty after potential upload failure etc.
            print("   Please provide a valid query, type 'upload', or 'quit'.")
            # No LLM call needed if input is invalid/empty.

    print("\n--- Interaction loop finished. ---")

# @title Gradio Application Cell (RUN THIS AS THE VERY LAST CELL)

import gradio as gr
import os
from pathlib import Path
import time
import shutil # For potential cleanup later, though Gradio manages its own temp for inputs

# --- Ensure global variables from your notebook are accessible by this point ---
# (Assuming all your necessary global variables and functions STT, LLM, TTS models & functions are loaded)
# stt_model, model_finetuned, llm_tokenizer, tts_voice, tts_sample_rate,
# conversation_history_finetuned, SYSTEM_PROMPT,
# transcribe_audio, get_llm_response, synthesize_for_gradio_and_autoplay,
# GRADIO_TTS_OUTPUT_DIR


# --- Gradio Processing Function (napcoach_interface_handler) ---
# This function remains THE SAME as in your original code.
# No changes are needed to napcoach_interface_handler itself.
# Make sure it's correctly defined as in your provided code.
def napcoach_interface_handler(user_audio_filepath, user_text_query, chat_history_for_display):
    """
    Handles user interaction for the NapCoach Gradio app.
    1. Transcribes audio if provided, otherwise uses text.
    2. Gets a response from the fine-tuned LLM, managing internal history.
    3. Synthesizes the LLM's response to audio.
    4. Updates the chat display.
    """
    print("\n--- Gradio: New Interaction ---")
    final_user_query_to_llm = ""
    llm_response_text = ""
    tts_output_audio_filepath = None

    # Determine input source (audio or text)
    if user_audio_filepath and os.path.exists(user_audio_filepath):
        print(f"🎤 Gradio: Processing audio input from: {user_audio_filepath}")
        # For progress with yield, it's better to handle it inside the generator
        # Here, we'll assume a blocking call for simplicity as in the original.
        # progress_bar = gr.Progress(track_tqdm=True) # If you want progress for non-yielding
        # progress_bar.update(0, desc="Transcribing your voice...")
        final_user_query_to_llm = transcribe_audio(user_audio_filepath) # Assumes transcribe_audio is defined
        # progress_bar.update(1.0, desc="Transcription complete!")
        if not final_user_query_to_llm:
            final_user_query_to_llm = "(Audio transcription failed or was empty)"
    elif user_text_query and user_text_query.strip():
        print(f"📝 Gradio: Processing text input: '{user_text_query}'")
        final_user_query_to_llm = user_text_query
    else:
        print("🤷 Gradio: No valid input provided by user.")
        yield chat_history_for_display, None, None, None, "" # (hist, llm_text, tts_audio, clear_audio_in, clear_text_in)
        return

    # Add user's query to Gradio's display history
    if chat_history_for_display is None: chat_history_for_display = []
    chat_history_for_display.append([final_user_query_to_llm, None])
    yield chat_history_for_display, None, None, None, "" # Update UI with user message

    # Get LLM Response
    print(f"🧠 Gradio: Sending to LLM: '{final_user_query_to_llm}'")
    if final_user_query_to_llm and final_user_query_to_llm != "(Audio transcription failed or was empty)":
        # progress_bar.update(0, desc="NapCoach is thinking...")
        llm_response_text = get_llm_response( # Assumes get_llm_response is defined
            user_message=final_user_query_to_llm,
            model_to_use=model_finetuned,
            tokenizer_to_use=llm_tokenizer,
            history=conversation_history_finetuned,
            is_finetuned_model=True
        )
        # progress_bar.update(1.0, desc="Response received!")
        if not llm_response_text:
            llm_response_text = "I'm sorry, I couldn't generate a response to that query."
    else:
        llm_response_text = "I couldn't understand the audio. Could you please type your query?"

    print(f"🤖 Gradio: LLM Response: '{llm_response_text}'")
    chat_history_for_display[-1][1] = llm_response_text # Update with bot response
    yield chat_history_for_display, llm_response_text, None, None, "" # Update UI with LLM text

    # Synthesize LLM's Text Response to Audio
    if llm_response_text:
        print(f"🔊 Gradio: Synthesizing TTS for: '{llm_response_text}'")
        # progress_bar.update(0, desc="NapCoach is preparing audio...")
        tts_output_audio_filepath = synthesize_for_gradio_and_autoplay(llm_response_text) # Assumes this is defined
        # progress_bar.update(1.0, desc="Audio ready!")
        if tts_output_audio_filepath:
            print(f"✅ Gradio: TTS audio ready at: {tts_output_audio_filepath}")
        else:
            print("⚠️ Gradio: TTS synthesis failed or returned no audio path.")

    # Final yield
    # Clear text input, audio input might clear automatically or you can return its current value to clear it
    yield chat_history_for_display, llm_response_text, tts_output_audio_filepath, None, ""


# --- Gradio Interface Definition using Blocks ---
with gr.Blocks(theme=gr.themes.Glass(), title="NapCoach AI", css="footer {display: none !important}") as demo: # Added CSS to hide footer

    # --- MODIFICATION 1: Simplified Header ---
    gr.Markdown(
        """
        <div style="text-align: center; padding-top: 10px; padding-bottom: 0px;">
            <h1 style="margin-bottom: 0px;">NapCoach AI 💤</h1>
            <p style="margin-top: 5px; font-size: 1.1em;">Your personal sleep coaching assistant</p>
        </div>
        """
    )

    chatbot_ui = gr.Chatbot(
        label="Conversation with NapCoach",
        bubble_full_width=False,
        height=500,
        # User avatar (None uses default). Bot avatar from your original code.
        # For a user icon similar to image, find an image URL: e.g., "https://img.icons8.com/ios-glyphs/90/663399/user-male-circle.png"
        avatar_images=(None, "https://img.icons8.com/color/96/000000/bot.png")
    )

    # This hidden textbox is a target for the LLM's text response.
    llm_text_for_display = gr.Textbox(label="Coach's Text Reply (hidden)", interactive=False, visible=False)

    # --- MODIFICATION 2: Input Area Reorganized ---
    with gr.Column(scale=1): # Main container for inputs at the bottom
        with gr.Accordion("🎙️ Use Voice (Tap to expand)", open=False):
            audio_in_ui = gr.Audio(
                sources=["microphone", "upload"],
                type="filepath",
                label="Record or Upload Audio Query",
                # format="wav" # Optional
            )

        with gr.Row(variant="compact", elem_id="input_row"): # Compact row for text input and send
            text_in_ui = gr.Textbox(
                placeholder="Type your message here...",
                show_label=False,
                lines=1,        # Single line for chat feel
                max_lines=5,    # Allow some expansion for longer messages
                container=False, # To make it fit better in the row potentially
                scale=7         # Textbox takes more width
            )
            submit_btn_ui = gr.Button(
                "➤",            # --- MODIFICATION 3: Icon-like send button ---
                variant="primary",
                scale=1,         # Button takes less width
                min_width=60     # Ensure button looks like a button
            )

    gr.Markdown("---") # Using a thematic break (horizontal rule) for visual separation instead of div spacer
    tts_audio_out_ui = gr.Audio(
        label="🎧 NapCoach Voice Reply", # Added emoji
        type="filepath",
        autoplay=True,
        interactive=False
    )
    gr.Markdown("---")

    # --- MODIFICATION 4: Utility Buttons and Status (Retaining cleaner layout from previous suggestion) ---
    def clear_internal_llm_history_action_ui_v2():
        global conversation_history_finetuned
        global SYSTEM_PROMPT # Make sure SYSTEM_PROMPT is accessible
        conversation_history_finetuned = []
        if 'SYSTEM_PROMPT' in globals() and SYSTEM_PROMPT:
            conversation_history_finetuned.append({"role": "system", "content": SYSTEM_PROMPT})
        print("🧹 Internal LLM conversation history cleared by UI button.")
        return "NapCoach's internal memory has been reset. You can start a new conversation."

    with gr.Row(equal_height=False): # Group utility buttons
        clear_btn_ui = gr.ClearButton(
            components=[audio_in_ui, text_in_ui, chatbot_ui, llm_text_for_display, tts_audio_out_ui], # Add status_text_ui here if you want this button to clear it
            value="🧹 Clear Chat & Inputs"
        )
        clear_llm_mem_btn_ui = gr.Button(
            "🧠 Reset Coach Memory"
        )

    status_text_ui = gr.Textbox( # Status text box below buttons
        placeholder="Status messages appear here.",
        interactive=False,
        lines=1,
        max_lines=1,
        show_label=False
    )
    # Connect the click action for resetting memory
    clear_llm_mem_btn_ui.click(
        fn=clear_internal_llm_history_action_ui_v2,
        inputs=None,
        outputs=[status_text_ui]
    )

    # --- Event Handling (No changes here from your original logic) ---
    gradio_inputs = [audio_in_ui, text_in_ui, chatbot_ui]
    gradio_outputs = [chatbot_ui, llm_text_for_display, tts_audio_out_ui,
                      audio_in_ui, text_in_ui] # Last two are for clearing inputs

    # When text is submitted via Enter key in text_in_ui or submit_btn_ui is clicked
    # We use a generator for napcoach_interface_handler
    actions = [text_in_ui.submit, submit_btn_ui.click]
    for action in actions:
        action(
            fn=napcoach_interface_handler,
            inputs=gradio_inputs,
            outputs=gradio_outputs,
            show_progress="full" # Show progress for generator
        )

# --- Launch the Gradio App ---
print("\n🚀 Launching Gradio Interface in Colab...")
demo.launch(debug=True, share=True, inbrowser=True)

⏳ Installing STT, TTS, and Audio Handling libraries...
⏳ Installing Core ML, LLM, and Fine-Tuning libraries...
   Attempting to upgrade 'datasets' library...
⏳ Installing TTS library...

--- Main Installations Attempted ---

⏳ Forcing fsspec version compatible with 'datasets'...
⏳ Installing Gradio

✅ All dependency installations and conflict resolutions attempted.
   We will proceed and see if this causes issues during model/data loading or training.

--- Verifying GPU Access ---
   >>> Action Required: Go to 'Runtime' -> 'Change runtime type', select 'T4 GPU', and click 'Save'. <<<
   Then, RESTART the runtime ('Runtime' -> 'Restart runtime') and run this cell again.
   Proceeding without a GPU will make LLM operations extremely slow.
--- GPU Verification Complete ---
⏳ Loading Whisper STT model 'base.en' onto cpu (compute: int8)...
✅ Whisper STT model ('base.en') loaded successfully.
Done
--- TTS Configuration ---
Repo ID: rhasspy/piper-voices
Voice Path Prefix: en/en_US/lessac/medi

For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


   Copied ONNX file to expected path: piper_models/en_US-lessac-medium.onnx
   Copied JSON file to expected path: piper_models/en_US-lessac-medium.onnx.json

 Verifying local files again before loading...

⏳ Loading TTS voice model into memory...
✅ TTS voice model 'en_US-lessac-medium' loaded successfully.
   Retrieved Sample Rate: 22050 Hz


CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


Successfully logged into Hugging Face Hub
⏳ Loading base LLM: 'mistralai/Mistral-7B-Instruct-v0.2' with 4-bit quantization...
   This requires downloading several GB and can take 5-15 minutes...
❌ Error loading base LLM model: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend
   >>> COMMON FIXES: <<<
   1. OUT OF MEMORY: Go to 'Runtime' -> 'Disconnect and delete runtime', then reconnect and run cells again.
   2. Ensure T4 GPU is selected in Runtime settings.
   3. Close other Colab notebooks or browser tabs consuming memory.
   4. If errors persist, Colab might be temporarily overloaded; try again later.

Skipping tokenizer loading because the base LLM failed to load.

✅ 'Listen' functions defined (transcribe_audio, 

Generating train split: 0 examples [00:00, ? examples/s]

CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


✅ Dataset loaded successfully:
   Number of examples: 90

   First example in loaded dataset:
{'instruction': 'User data: Slept 5 hours 10 minutes, 45 mins deep sleep, 1 hour REM. Felt very tired waking up. Question: What should I focus on?', 'response': "Getting only around 5 hours isn't ideal for most adults, which likely explains the tiredness. Focus on extending your total sleep time towards the 7-9 hour range. A consistent wind-down routine before bed might help you fall asleep faster and get more restorative deep and REM sleep."}
--- Checking Prerequisites for Fine-Tuning ---
❌ Prerequisite Failed: Base LLM model ('llm_model') not loaded. Run Step 4.
❌ Prerequisite Failed: LLM Tokenizer ('llm_tokenizer') not loaded. Run Step 4.

🛑 Cannot proceed with fine-tuning due to missing prerequisites.
Base model not found in memory. Attempting reload...
⏳ Reloading base model 'mistralai/Mistral-7B-Instruct-v0.2' with quantization...
❌ Failed to reload base model/tokenizer: CUDA is required

  chatbot_ui = gr.Chatbot(
  chatbot_ui = gr.Chatbot(



🚀 Launching Gradio Interface in Colab...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://07dd1f7999b8ba07e0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://07dd1f7999b8ba07e0.gradio.live


