In [None]:
!huggingface-cli login

# Required libraries

In [None]:
!pip install -q transformers accelerate bitsandbytes gradio

In [None]:
!pip install peft

In [None]:
!pip install langchain faiss-cpu sentence-transformers

In [None]:
!pip install -U langchain-community

#Rag Database

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.load_local(
    "/content/drive/MyDrive/rag_index",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)


# Data structure and Memory System

In [None]:
import torch
import json
import os
from datetime import datetime # Import datetime
from typing import Dict, List
import gradio as gr

class SimpleDataManager:
    """Simple file-based storage for user data and recommendations"""

    def __init__(self, data_dir="wellbeing_data"):
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)
        os.makedirs(f"{self.data_dir}/users", exist_ok=True)
        os.makedirs(f"{self.data_dir}/recommendations", exist_ok=True) # Corrected path

    def save_user_week(self, user_id: str, week_data: Dict):
        """Save weekly data for a user"""
        user_file = f"{self.data_dir}/users/{user_id}.json"

        try:
            # Load existing data
            if os.path.exists(user_file):
                with open(user_file, 'r+') as f:
                    user_data = json.load(f)
            else:
                user_data = {"user_id": user_id, "weeks": []}

            # Add new week
            user_data["weeks"].append(week_data)

            # Keep only last 8 weeks
            user_data["weeks"] = user_data["weeks"][-8:]

            # Save
            with open(user_file, 'w') as f:
                json.dump(user_data, f, indent=2)

            print(f"✅ Saved week data for user {user_id}")

        except Exception as e:
            print(f"❌ Error saving user data: {str(e)}")

    def get_user_history(self, user_id: str) -> List[Dict]:
        """Get user's weekly history"""
        user_file = f"{self.data_dir}/users/{user_id}.json"

        try:
            if os.path.exists(user_file):
                with open(user_file, 'r') as f:
                    user_data = json.load(f)
                    history = user_data.get("weeks", [])
                    print(f"📊 Retrieved {len(history)} weeks of history for {user_id}")
                    return history
        except Exception as e:
            print(f"❌ Error loading user history: {str(e)}")

        return []

    def save_recommendation(self, user_id: str, week_start: str, recommendation: str):
        """Save LLM recommendation"""
        # Clean week_start for filename (remove invalid characters)
        clean_week = week_start.replace("/", "-").replace(":", "-")
        rec_file = f"{self.data_dir}/recommendations/{user_id}_{clean_week}.json"

        try:
            rec_data = {
                "user_id": user_id,
                "week_start": week_start,
                "recommendation": recommendation,
                "timestamp": datetime.now().isoformat()
            }

            with open(rec_file, 'w') as f:
                json.dump(rec_data, f, indent=2)

            print(f"✅ Saved recommendation for user {user_id}, week {week_start}")

        except Exception as e:
            print(f"❌ Error saving recommendation: {str(e)}")

    def get_last_recommendation(self, user_id: str) -> str:
        """Get user's last recommendation"""
        rec_dir = f"{self.data_dir}/recommendations"

        try:
            # Check if recommendations directory exists
            if not os.path.exists(rec_dir):
                return ""

            # Find latest recommendation file for this user
            user_files = [f for f in os.listdir(rec_dir) if f.startswith(f"{user_id}_") and f.endswith('.json')]

            if user_files:
                # Sort by date and get latest
                user_files.sort(reverse=True)
                latest_file = f"{rec_dir}/{user_files[0]}"

                with open(latest_file, 'r') as f:
                    rec_data = json.load(f)
                    recommendation = rec_data.get("recommendation", "")
                    print(f"📝 Retrieved last recommendation for {user_id} ({len(recommendation)} chars)")
                    return recommendation

        except Exception as e:
            print(f"❌ Error loading last recommendation: {str(e)}")

        return ""

    def get_user_stats(self, user_id: str) -> Dict:
        """Get basic stats about user's data (bonus method)"""
        history = self.get_user_history(user_id)

        if not history:
            return {"total_weeks": 0}

        return {
            "total_weeks": len(history),
            "first_week": history[0].get("week_start", "Unknown"),
            "latest_week": history[-1].get("week_start", "Unknown"),
            "avg_steps": sum(week.get("total_steps", 0) for week in history) // len(history),
            "avg_sleep": sum(week.get("avg_sleep", 0) for week in history) / len(history)
        }

#Well being LLM

In [None]:
import torch
import re
from typing import Dict, List
from transformers import AutoTokenizer, AutoModelForCausalLM

class WellbeingLLM:
    """Improved LLM system for wellbeing recommendations with better prompt engineering"""

    def __init__(self, base_model_id: str, vectorstore_path: str = None):
        self.model = None
        self.tokenizer = None
        self.device = None
        self.vectorstore = None
        # data_manager is not defined in this class, it should be passed as an argument
        # self.data_manager = data_manager

        # Load vectorstore if path provided
        if vectorstore_path:
            self.load_vectorstore(vectorstore_path)

        self.load_model(base_model_id)

    def load_vectorstore(self, vectorstore_path: str):
        """Load FAISS vectorstore for RAG"""
        try:
            from langchain.vectorstores import FAISS
            from langchain.embeddings import HuggingFaceEmbeddings

            print("📚 Loading knowledge base...")
            embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

            self.vectorstore = FAISS.load_local(
                vectorstore_path,
                embeddings=embedding_model,
                allow_dangerous_deserialization=True
            )
            print("✅ Knowledge base loaded")

        except Exception as e:
            print(f"⚠️ Could not load vectorstore: {e}")
            self.vectorstore = None

    def load_model(self, model_path):
        model_path = "AnjaliNV/Merged_WellBeing_LLM_FP16"
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        self.model.eval()
        self.device = self.model.device # Set device after loading model
        print(f"✅ LLM loaded successfully on {self.device}")


        # Master template builder with history
    def build_template(self, current_week: dict, user_history: list = None) -> str: # Added self
        demographics = current_week.get('demographics', {})
        prefs = current_week.get('preferences', {})
        food_data = current_week.get('food_data', {})

        history_str = ""
        if user_history:
            history_lines = []
            for i, past in enumerate(user_history[-3:], 1):  # limit to last 3 weeks
                history_lines.append(f"Week {-len(user_history)+i}: {json.dumps(past)}")
            history_str = "\n\nPrevious weeks data and recommendations:\n" + "\n".join(history_lines)

        prompt = f"""
    A user is a {demographics.get('sex', 'unknown')} aged {demographics.get('age', 'unknown')} years,
    height {demographics.get('height', 'unknown')} cm, weight {demographics.get('weight', 'unknown')} kg.
    Weekly stats: {current_week.get('total_steps',0):,} steps, {current_week.get('zone_minutes',0)} zone minutes,
    {current_week.get('exercise_sessions',0)} exercise sessions, avg sleep {current_week.get('avg_sleep',0):.1f}h.
    Diet: {prefs.get('diet_type','No Preference')}, Allergies: {', '.join(prefs.get('allergies',[])) if prefs.get('allergies') else 'none'}
    Food consumption: Dairy {food_data.get('dairy_liters',0)}L, Legumes {food_data.get('legumes_grams',0)}g, Meat {food_data.get('meat_grams',0)}g,
    Fruits {food_data.get('fruits_grams',0)}g, Vegetables {food_data.get('vegetables_grams',0)}g, Grains {food_data.get('grains_grams',0)}g,
    Nuts {food_data.get('nuts_seeds_grams',0)}g, Water {food_data.get('water_liters',0)}L.
    {history_str}

    ---
    **Provide structured health recommendations in this format**:
    1) Food Recommendation
    2) Physical Activity
    3) Sleep & Well-being
    4) Weekly Summary
    """
        return prompt

    def generate_recommendation(self, current_week: Dict, user_history: List[Dict] = None, last_recommendation: str = None) -> str:
          """Generate context-aware weekly recommendations"""

          # Build prompt with current week + history + last recommendation
          history_str = ""
          if user_history:
              last_weeks = user_history[-3:]  # last 3 weeks
              for i, past_week in enumerate(last_weeks, 1):
                  history_str += f"Week {-len(last_weeks)+i}: {past_week}\n"

          last_rec_str = f"Last recommendation:\n{last_recommendation}\n" if last_recommendation else ""

          prompt = f"""
  User weekly data:
  {current_week}

  {history_str}
  {last_rec_str}

  Please provide structured health recommendations in this format:
  1) Food Recommendation
  2) Physical Activity
  3) Sleep & Well-being
  4) Weekly Summary
  """

          inputs = self.tokenizer(f"<|user|>\n{prompt}\n<|assistant|>\n", return_tensors="pt").to(self.model.device)

          with torch.no_grad():
              output_ids = self.model.generate(
                  **inputs,
                  max_new_tokens=1024,
                  do_sample=True,
                  temperature=0.7,
                  top_p=0.9
              )

          response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
          return response.replace(prompt, "").strip()

# Well being system

In [None]:
from typing import Dict, List
import torch
import json
import os
from datetime import datetime # Import datetime
import gradio as gr

class WellbeingSystem:
    """Main system that combines data management and LLM"""

    def __init__(self, base_model_id: str, vectorstore_path: str = None):
        self.data_manager = SimpleDataManager()
        self.llm = WellbeingLLM(base_model_id, vectorstore_path) # Pass vectorstore_path to LLM
        print("🎯 WellbeingSystem initialized")

    def analyze_and_recommend(self, user_id: str, week_data: Dict) -> str:
        """Main function: analyze weekly data and generate recommendations"""

        print(f"📊 Analyzing data for user: {user_id}")

        try:
            # Get user history and last recommendation
            user_history = self.data_manager.get_user_history(user_id)
            last_recommendation = self.data_manager.get_last_recommendation(user_id)

            print(f"📈 Found {len(user_history)} weeks of history")
            if last_recommendation:
                print(f"📝 Previous recommendation found ({len(last_recommendation)} chars)")

            # Process current week data
            processed_week = self._process_week_data(week_data)

            # Show what data is being sent to LLM
            print(f"📊 Processed data: {processed_week['total_steps']:,} steps, {processed_week['zone_minutes']} zone mins, {processed_week['avg_sleep']:.1f}h sleep")

            # Generate recommendation using LLM
            recommendation = self.llm.generate_recommendation(
                processed_week, user_history, last_recommendation
            )

            # Save data (with error handling)
            try:
                self.data_manager.save_user_week(user_id, processed_week)
                self.data_manager.save_recommendation(user_id, processed_week['week_start'], recommendation)
                print(f"💾 Data saved successfully")
            except Exception as save_error:
                print(f"⚠️ Warning: Could not save data: {save_error}")
                # Continue anyway - return the recommendation even if saving fails

            print(f"✅ Generated recommendation for {user_id} ({len(recommendation)} chars)")
            return recommendation

        except Exception as e:
            print(f"❌ Error in analyze_and_recommend: {str(e)}")
            import traceback
            traceback.print_exc()
            return f"Analysis failed: {str(e)}. Please check your input data."

    def _process_week_data(self, week_data: Dict) -> Dict:
        """Process and standardize weekly data with better validation"""

        try:
            # Parse sleep and mood data with validation
            sleep_hours = week_data.get('sleep_hours', [])
            mood_scores = week_data.get('mood_scores', [])

            # Handle string input (comma-separated values)
            if isinstance(sleep_hours, str):
                try:
                    sleep_hours = [float(x.strip()) for x in sleep_hours.split(',') if x.strip()]
                except ValueError:
                    print("⚠️ Invalid sleep hours format, using default")
                    sleep_hours = []

            if isinstance(mood_scores, str):
                try:
                    mood_scores = [float(x.strip()) for x in mood_scores.split(',') if x.strip()]
                except ValueError:
                    print("⚠️ Invalid mood scores format, using default")
                    mood_scores = []

            # Calculate averages with validation
            avg_sleep = sum(sleep_hours) / len(sleep_hours) if sleep_hours else 0
            avg_mood = sum(mood_scores) / len(mood_scores) if mood_scores else 0

            # 🚀 ADD DEBUG:
            print(f"🔍 DEBUG - Sleep calculation: {sleep_hours} → avg: {avg_sleep}")
            print(f"🔍 DEBUG - Mood calculation: {mood_scores} → avg: {avg_mood}")

            # Enhanced food data processing
            food_data = self._process_food_data(week_data)

            processed_data = {
                'week_start': week_data.get('week_start', datetime.now().strftime('%Y-%m-%d')),
                'total_steps': max(0, int(week_data.get('total_steps', 0))), # Ensure int and non-negative
                'zone_minutes': max(0, int(week_data.get('zone_minutes', 0))), # Ensure int and non-negative
                'exercise_sessions': max(0, int(week_data.get('exercise_sessions', 0))), # Ensure int and non-negative
                'avg_sleep': round(avg_sleep, 1),
                'avg_mood': round(avg_mood, 1),
                'food_data': food_data,
                'timestamp': datetime.now().isoformat(),
                'demographics': week_data.get('demographics', {})
            }


            if 'preferences' in week_data:
                processed_data['preferences'] = week_data['preferences']



            return processed_data

        except Exception as e:
            print(f"❌ Error processing week data: {str(e)}")
            # Return empty or default data structure on error
            return {
                 'week_start': week_data.get('week_start', datetime.now().strftime('%Y-%m-%d')),
                 'total_steps': 0,
                 'zone_minutes': 0,
                 'exercise_sessions': 0,
                 'avg_sleep': 0.0,
                 'avg_mood': 0.0,
                 'food_data': {},
                 'timestamp': datetime.now().isoformat(),
                 'processing_error': str(e)
            }


    def _process_food_data(self, week_data: Dict) -> Dict:
        """Process food data handling both old and new formats"""

        food_data = {}

        # Check for new format first (with units)
        unit_fields = ['dairy_liters', 'water_liters', 'legumes_grams', 'meat_grams',
                      'fruits_grams', 'vegetables_grams', 'grains_grams', 'nuts_seeds_grams']

        has_unit_format = any(field in week_data for field in unit_fields)

        if has_unit_format:
            print("📊 Using new format (with units)")
            # New format with units
            for field in unit_fields:
                value = week_data.get(field, 0)
                try:
                    food_data[field] = max(0, float(value))  # Ensure non-negative
                except (ValueError, TypeError):
                    food_data[field] = 0
                    print(f"⚠️ Invalid {field} value, using 0")
        else:
            print("📊 Using old format (servings)")
            # Old format (for backward compatibility)
            old_fields = ['dairy', 'legumes', 'meat', 'fruits', 'vegetables', 'grains', 'nuts_seeds', 'water_glasses']
            for field in old_fields:
                if field in week_data:
                    try:
                        food_data[field] = max(0, float(week_data[field]))
                    except (ValueError, TypeError):
                        food_data[field] = 0

        return food_data

# Well being app

In [None]:
import gradio as gr
import json
import os
from datetime import datetime
from typing import Dict, List

class WellbeingApp:
    """Complete Gradio interface with history and preferences (no targets)"""

    def __init__(self, base_model_id: str):
        print("🚀 Initializing Complete WellbeingApp...")
        self.wellbeing_system = WellbeingSystem(
            base_model_id,
            vectorstore_path="/content/drive/MyDrive/rag_index"
        )

    def create_wellbeing_app(self):
        """Create comprehensive Gradio interface with tabs"""

        with gr.Blocks(title="🏥 Complete Wellbeing LLM", theme=gr.themes.Soft()) as demo:

            gr.Markdown("# 🏥 Complete AI Wellbeing System")
            gr.Markdown("### Personalized health recommendations based on your current health data")

            # Create tabs for different sections
            with gr.Tabs():

                # TAB 1: MAIN ANALYSIS
                with gr.TabItem("🤖 Get Recommendations", elem_id="main-tab"):
                    self._create_main_analysis_tab()

                # TAB 2: HISTORY REVIEW
                with gr.TabItem("📈 Progress History", elem_id="history-tab"):
                    self._create_history_tab()

        return demo

    def _create_main_analysis_tab(self):
        """Create the main analysis tab"""

        with gr.Row():
            with gr.Column():

                gr.Markdown("### 🤖 AI Enhancement Options")
                # User Info Section
                gr.Markdown("### 👤 User Information")
                user_id = gr.Textbox(label="👤 User ID", value="user_001", info="Unique identifier for tracking your progress")
                week_start = gr.Textbox(label="📅 Week Start Date", value="2024-01-15", info="Format: YYYY-MM-DD")

                # Personal Details
                sex = gr.Dropdown(
                    label="⚧️ Sex",
                    choices=["Male", "Female"],
                    value="Male",
                    info="Biological sex"
                )

                age = gr.Number(
                    label="🎂 Age",
                    value=25,
                    minimum=13,
                    maximum=120,
                    step=1,
                    info="Age in years"
                )

                weight = gr.Number(
                    label="⚖️ Weight (kg)",
                    value=70.0,
                    minimum=30.0,
                    maximum=300.0,
                    step=0.1,
                    info="Current weight in kilograms"
                )

                height = gr.Number(
                    label="📏 Height (cm)",
                    value=175.0,
                    minimum=100.0,
                    maximum=250.0,
                    step=0.1,
                    info="Height in centimeters"
                )

                # Preferences Section
                gr.Markdown("### 🍽️ Meal Preferences & Dietary Restrictions")
                with gr.Row():
                    diet_type = gr.Dropdown(
                        label="🥗 Diet Type",
                        choices=[
                            "No Preference", "Vegetarian", "Vegan", "Pescatarian",
                            "Keto", "Paleo", "Mediterranean", "Low Carb", "Gluten Free", "Dairy Free"
                        ],
                        value="No Preference",
                        info="Select your primary dietary preference"
                    )

                allergies = gr.CheckboxGroup(
                    label="⚠️ Food Allergies & Intolerances",
                    choices=[
                        "Nuts (Tree nuts)", "Peanuts", "Shellfish", "Fish",
                        "Milk/Dairy", "Eggs", "Soy", "Wheat/Gluten",
                        "Sesame", "Lactose Intolerant", "Other"
                    ],
                    info="⚠️ IMPORTANT: Select all allergies and intolerances"
                )

                other_allergies = gr.Textbox(
                    label="🚨 Other Allergies/Restrictions",
                    placeholder="e.g., tomatoes, specific medications, religious restrictions...",
                    info="Specify any other dietary restrictions or allergies"
                )

                # Activity Section
                gr.Markdown("### 🏃‍♂️ Physical Activity")
                total_steps = gr.Number(label="Total Steps (Week)", value=65000, info="Total steps for the week")
                zone_minutes = gr.Number(label="Zone Minutes (Week)", value=120, info="Active zone minutes for the week")
                exercise_sessions = gr.Number(label="Exercise Sessions", value=3, info="Number of exercise sessions this week")

                # Sleep & Mood Section
                gr.Markdown("### 😴 Sleep & Mood")
                sleep_hours = gr.Textbox(
                    label="Sleep Hours (7 days)",
                    value="7.5,6.8,8.2,7.0,6.5,8.5,7.8",
                    info="Enter daily sleep hours separated by commas"
                )
                mood_scores = gr.Textbox(
                    label="Mood Scores (7 days)",
                    value="4,3.5,5,3,4,2.5,5",
                    info="Rate your daily mood from 1-5, separated by commas"
                )

                # Food Section
                gr.Markdown("### 🍎 Weekly Nutrition Intake")
                gr.Markdown("*Enter your total consumption for the entire week*")

                with gr.Row():
                    dairy = gr.Number(label="🥛 Dairy (liters)", value=3.5, info="Milk, yogurt, cheese")
                    water_liters = gr.Number(label="💧 Water (liters)", value=48, info="Total water consumption for the week")

                with gr.Row():
                    fruits = gr.Number(label="🍎 Fruits (grams)", value=2100, info="Total fruit consumption for the week")
                    vegetables = gr.Number(label="🥬 Vegetables (grams)", value=3500, info="Total vegetable consumption for the week")

                with gr.Row():
                    legumes = gr.Number(label="🫘 Legumes (grams)", value=350, info="Beans, lentils, peas")
                    nuts_seeds = gr.Number(label="🥜 Nuts/Seeds (grams)", value=210, info="Almonds, walnuts, chia seeds")

                with gr.Row():
                    meat = gr.Number(label="🍗 Meat (grams)", value=700, info="Chicken, beef, fish")
                    grains = gr.Number(label="🌾 Grains (grams)", value=2800, info="Rice, bread, pasta, oats")

                # Action buttons
                with gr.Row():
                    analyze_btn = gr.Button("🤖 Get AI Recommendations", variant="primary", size="lg")

            with gr.Column():
                main_output = gr.Markdown("""
    ### 🏥 Wellbeing Analysis Results
    Click "Get AI Recommendations" to receive personalized health advice based on your data.
                """)

        # Store components
        self.main_components = {
            'user_id': user_id,
            'week_start': week_start,
            'sex': sex,
            'age': age,
            'weight': weight,
            'height': height,
            'total_steps': total_steps,
            'zone_minutes': zone_minutes,
            'exercise_sessions': exercise_sessions,
            'sleep_hours': sleep_hours,
            'mood_scores': mood_scores,
            'dairy_liters': dairy,
            'legumes_grams': legumes,
            'meat_grams': meat,
            'fruits_grams': fruits,
            'vegetables_grams': vegetables,
            'grains_grams': grains,
            'nuts_seeds_grams': nuts_seeds,
            'water_liters': water_liters,
            'diet_type': diet_type,
            'allergies': allergies,
            'other_allergies': other_allergies,
            'output': main_output
        }

        # Connect button
        analyze_btn.click(
            fn=self.analyze_wellbeing_with_preferences,
            inputs=[
                user_id, week_start, sex, age, weight, height,
                total_steps, zone_minutes, exercise_sessions,
                sleep_hours, mood_scores, dairy, legumes, meat,
                fruits, vegetables, grains, nuts_seeds, water_liters,
                diet_type, allergies, other_allergies
            ],
            outputs=[main_output]
        )

    def analyze_wellbeing_with_preferences(self, user_id, week_start, sex, age, weight, height,
                                     total_steps, zone_minutes, exercise_sessions,
                                     sleep_hours, mood_scores, dairy_liters, legumes_grams, meat_grams,
                                     fruits_grams, vegetables_grams, grains_grams, nuts_seeds_grams, water_liters,
                                     diet_type, allergies, other_allergies):
        """Enhanced analysis with user demographics and preferences"""

        try:
            # Input validation
            if not user_id or not user_id.strip():
                return "❌ Error: Please enter a valid User ID"

            if not week_start:
                return "❌ Error: Please enter a week start date"

            # Validate demographic data
            if age is not None and (age < 13 or age > 120):
                return "❌ Error: Please enter a valid age (13-120 years)"

            if weight is not None and (weight < 30 or weight > 300):
                return "❌ Error: Please enter a valid weight (30-300 kg)"

            if height is not None and (height < 100 or height > 250):
                return "❌ Error: Please enter a valid height (100-250 cm)"

            # Calculate BMI if weight and height are provided
            bmi = None
            if weight and height:
                height_m = height / 100  # Convert cm to meters
                bmi = round(weight / (height_m ** 2), 1)

            # Prepare enhanced data with demographics
            week_data = {
                'user_id': user_id.strip(),
                'week_start': week_start,
                'demographics': {
                    'sex': sex,
                    'age': int(age) if age else None,
                    'weight': float(weight) if weight else None,
                    'height': float(height) if height else None,
                    'bmi': bmi
                },
                'total_steps': max(0, int(total_steps)) if total_steps else 0,
                'zone_minutes': max(0, int(zone_minutes)) if zone_minutes else 0,
                'exercise_sessions': max(0, int(exercise_sessions)) if exercise_sessions else 0,
                'sleep_hours': sleep_hours,
                'mood_scores': mood_scores,
                'dairy_liters': max(0, float(dairy_liters)) if dairy_liters else 0,
                'legumes_grams': max(0, float(legumes_grams)) if legumes_grams else 0,
                'meat_grams': max(0, float(meat_grams)) if meat_grams else 0,
                'fruits_grams': max(0, float(fruits_grams)) if fruits_grams else 0,
                'vegetables_grams': max(0, float(vegetables_grams)) if vegetables_grams else 0,
                'grains_grams': max(0, float(grains_grams)) if grains_grams else 0,
                'nuts_seeds_grams': max(0, float(nuts_seeds_grams)) if nuts_seeds_grams else 0,
                'water_liters': max(0, float(water_liters)) if water_liters else 0,
                'preferences': {
                    'diet_type': diet_type,
                    'allergies': allergies if allergies else [],
                    'other_allergies': other_allergies.strip() if other_allergies else ""
                }
            }

            recommendation = self.wellbeing_system.analyze_and_recommend(user_id.strip(), week_data)

            # Format output without targets
            output = self._format_analysis_output(week_data, recommendation)

            return output

        except Exception as e:
            print(f"❌ Analysis error: {str(e)}")
            import traceback
            traceback.print_exc()
            return f"❌ Error: {str(e)}\n\nPlease check your input format and try again."

    def _create_history_tab(self):
        """Create the history review tab"""

        gr.Markdown("## 📈 Progress History & Analytics")
        gr.Markdown("### Track your health journey and see improvements over time")

        with gr.Row():
            with gr.Column():
                gr.Markdown("### 👤 User Selection")
                history_user_id = gr.Textbox(label="👤 User ID", value="user_001", info="Enter user ID to view history")

                gr.Markdown("### 📊 Analysis Options")
                analysis_type = gr.Radio(
                    label="📈 View Type",
                    choices=[
                        "📊 Complete Progress Summary",
                        "📈 Weekly Trends Analysis",
                        "📋 Raw Data Export",
                        "🏆 Achievement Milestones"
                    ],
                    value="📊 Complete Progress Summary",
                    info="Choose what type of analysis you want to see"
                )

                weeks_to_show = gr.Slider(
                    label="📅 Weeks to Include",
                    minimum=1,
                    maximum=12,
                    value=4,
                    step=1,
                    info="How many recent weeks to analyze"
                )

                with gr.Row():
                    view_history_btn = gr.Button("📊 View Progress", variant="primary", size="lg")
                    export_data_btn = gr.Button("💾 Export Data", variant="secondary")
                    delete_history_btn = gr.Button("🗑️ Clear History", variant="stop")

            with gr.Column():
                history_output = gr.Markdown("""
    ### 📈 Progress History
    Select a user and analysis type to view historical data and trends.
                """)

        # Store history components
        self.history_components = {
            'user_id': history_user_id,
            'analysis_type': analysis_type,
            'weeks_to_show': weeks_to_show,
            'output': history_output
        }

    def _format_preferences_display(self, preferences: dict) -> str:
        """Format preferences for nice display"""

        lines = ["### 🍽️ Your Dietary Profile:"]

        # Diet type
        if preferences.get('diet_type', 'No Preference') != "No Preference":
            lines.append(f"- **Diet:** {preferences['diet_type']}")

        # Allergies (most important!)
        if preferences.get('allergies'):
            allergy_list = ", ".join(preferences['allergies'])
            lines.append(f"- **⚠️ ALLERGIES:** {allergy_list}")

        if preferences.get('other_allergies'):
            lines.append(f"- **⚠️ OTHER RESTRICTIONS:** {preferences['other_allergies']}")

        if len(lines) == 1:  # Only the header
            lines.append("- No specific dietary preferences set")

        return "\n".join(lines)

    def _format_analysis_output(self, week_data, recommendation):
        """Format analysis output without targets"""

        daily_steps = week_data['total_steps'] / 7
        daily_water = week_data['water_liters'] / 7

        # Safely get avg_sleep and avg_mood with defaults
        avg_sleep = week_data.get('avg_sleep', 0)
        avg_mood = week_data.get('avg_mood', 0)

        preferences_display = self._format_preferences_display(week_data['preferences'])

        # Demographics display
        demographics_info = ""
        if week_data.get('demographics'):
            demo = week_data['demographics']
            if demo.get('bmi'):
                demographics_info = f"\n### 👤 Health Profile:\n- **BMI:** {demo['bmi']} (Height: {demo['height']}cm, Weight: {demo['weight']}kg)"

        return f"""# 🏥 Personalized Wellbeing Analysis & Recommendations

## 👤 User: {week_data['user_id']} | 📅 Week: {week_data['week_start']}

{preferences_display}

{demographics_info}

### 📊 Your Weekly Data Summary:
**🏃‍♂️ Activity:**
- **Steps:** {week_data['total_steps']:,} ({daily_steps:.0f}/day)
- **Zone Minutes:** {week_data['zone_minutes']}
- **Exercise Sessions:** {week_data['exercise_sessions']}

**😴 Sleep & Mood:**
- **Sleep:** {week_data['sleep_hours']} hours daily
- **Mood:** {week_data['mood_scores']}/10 daily scores

**🍎 Nutrition:**
- 🥛 **Dairy:** {week_data['dairy_liters']:.1f}L | 💧 **Water:** {week_data['water_liters']:.1f}L ({daily_water:.1f}L/day)
- 🍎 **Fruits:** {week_data['fruits_grams']:.0f}g | 🥬 **Vegetables:** {week_data['vegetables_grams']:.0f}g
- 🫘 **Legumes:** {week_data['legumes_grams']:.0f}g | 🍗 **Meat:** {week_data['meat_grams']:.0f}g
- 🌾 **Grains:** {week_data['grains_grams']:.0f}g | 🥜 **Nuts/Seeds:** {week_data['nuts_seeds_grams']:.0f}g

---

## 🤖 AI Recommendations
*Personalized based on your current health data, preferences, and dietary restrictions*

{recommendation}
"""

# Main execution

In [None]:
# Uncomment to run:
app = WellbeingApp(base_model_id="AnjaliNV/Merged_WellBeing_LLM_FP16")
demo = app.create_wellbeing_app()
demo.launch(share=True, debug=True)

# Fine Tuning

## fine tune 1

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
import torch
from transformers import BitsAndBytesConfig

# --- Paths ---
base_model_id = "ContactDoctor/Bio-Medical-Llama-3-8B"
adapter_path = "AnjaliNV/WellBeing_LLM"  # previously trained LoRA adapter

# --- Load tokenizer and base model ---
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model = prepare_model_for_kbit_training(model)
model = PeftModel.from_pretrained(model, adapter_path)

# --- Load new dataset ---
dataset = load_dataset("AnjaliNV/Templete2", split="train")
dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = dataset['train']
val_dataset = dataset['test']

def format_prompt(example):
    return {
        "text": f"<|user|>\n{example['input']}\n<|assistant|>\n{example['output']}"
    }

train_dataset = train_dataset.map(format_prompt)
val_dataset = val_dataset.map(format_prompt)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# --- Training arguments ---
training_args = TrainingArguments(
    output_dir="./diet-finetuned-model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,  # you can choose fewer epochs since continuing training
    learning_rate=2e-4,
    bf16=True,
    logging_dir="./logs",
    save_strategy="epoch",
    save_total_limit=2,
    gradient_checkpointing=True,
    report_to="none",
)

# --- Trainer ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# --- Continue fine-tuning ---
trainer.train()

# --- Save updated LoRA adapter ---
model.save_pretrained(adapter_path)
tokenizer.save_pretrained(adapter_path)




In [None]:
adapter_path = "AnjaliNV/WellBeing_LLM"
model.push_to_hub(adapter_path, use_auth_token=True)
tokenizer.push_to_hub(adapter_path, use_auth_token=True)

## fine tuning domain Q and A

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig, get_peft_model
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader

# 1️⃣ Load base model and tokenizer
base_model_name = "AnjaliNV/Wellbeing_coach"  # e.g., a llama or bloom model
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    load_in_4bit=True,
    device_map="auto",
    bnb_4bit_compute_dtype=torch.float16
)
model.to(device)


# 2️⃣ Load existing LoRA adapter
lora_model_path = "AnjaliNV/WellBeing_LoRA"
model = PeftModel.from_pretrained(model, lora_model_path)
model.to(device)


# 3️⃣ Load your dataset
# Check available splits and use the first one
dataset_dict = load_dataset("AnjaliNV/Custom_templete2")
first_split_name = list(dataset_dict.keys())[0]
dataset = dataset_dict[first_split_name]


# Preprocess as before
def preprocess(example):
    response = (
        f"Food Recommendation:\n{example['food']}\n\n"
        f"Physical Activity:\n{example['fitness']}\n\n"
        f"Sleep & Well-being:\n{example['sleep_lifestyle']}\n\n"
        f"Weekly Summary:\n{example['overall']}"
    )
    return {"text": f"Instruction: {example['input']}\nResponse: {response}"}


dataset = dataset.map(preprocess)

def tokenize(batch):
    tokenized_inputs = tokenizer(
        batch["text"],
        truncation=True, padding="max_length", max_length=512, return_tensors="pt")
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].clone() # Add labels for training
    return tokenized_inputs

tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# Create a PyTorch Dataset
class TextDataset(Dataset):
    def __init__(self, tokenized_dataset):
        self.tokenized_dataset = tokenized_dataset

    def __len__(self):
        return len(self.tokenized_dataset)

    def __getitem__(self, idx):
        return {key: torch.tensor(val).squeeze() if isinstance(val, list) else torch.tensor(val)
                for key, val in self.tokenized_dataset[idx].items()}


train_dataset = TextDataset(tokenized_dataset)


# 4️⃣ Continue fine-tuning
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, padding=True, label_pad_token_id=tokenizer.pad_token_id) # Specify label_pad_token_id

training_args = TrainingArguments(
    output_dir="./WellBeing_LoRA_updated",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=3e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    report_to="none",
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset, # Use the custom PyTorch Dataset
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Freeze base model, enable LoRA
for name, param in model.named_parameters():
    param.requires_grad = False
for name, param in model.named_parameters():
    if "lora_" in name:
        param.requires_grad = True

model.train()


trainer.train()

# 5️⃣ Save updated LoRA adapter only
model.save_pretrained("./WellBeing_LoRA_updated")
tokenizer.save_pretrained("./WellBeing_LoRA_updated")

In [None]:
from peft import PeftModel

base_model_name = "AnjaliNV/Wellbeing_coach"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    load_in_4bit=True  # if your base model was loaded in 4-bit
)
# Load your merged adapter locally
adapter_model = PeftModel.from_pretrained(base_model, "./WellBeing_LoRA_updated")

# Push to your Hugging Face repo
adapter_model.push_to_hub("AnjaliNV/WellBeing_LoRA", use_auth_token=True)

# Optionally, also push tokenizer updates
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("./WellBeing_LoRA_updated")
tokenizer.push_to_hub("AnjaliNV/WellBeing_LoRA", use_auth_token=True)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "AnjaliNV/Wellbeing_coach"      # e.g. "meta-llama/Llama-2-7b-hf"
adapter_path   = "AnjaliNV/WellBeing_LoRA"  # path or HF repo of your fine-tuned adapter
merged_path    = "AnjaliNV/WellBeing_merged_coach"

# 1. Load base model + adapter
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype="auto",
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, adapter_path)

# 2. Merge LoRA weights into base model
model = model.merge_and_unload()   # <-- key step

# 3. Save merged model
model.save_pretrained(merged_path, safe_serialization=True)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.save_pretrained(merged_path)

print(f"✅ Merged model saved at {merged_path}")


In [None]:
from huggingface_hub import login, HfApi
import os

# 1. Log in to your Hugging Face account (paste your token from https://huggingface.co/settings/tokens)


# 2. Define repo ID (new repo you want to create on Hugging Face)
repo_id = "AnjaliNV/WellBeing_merged_coach"

# 3. Create repo if it doesn’t exist already
api = HfApi()
api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)

# 4. Upload your merged model folder
local_dir = "AnjaliNV/WellBeing_merged_coach"  # the folder you saved earlier

api.upload_folder(
    repo_id=repo_id,
    repo_type="model",
    folder_path=local_dir
)

print(f"✅ Model pushed successfully! Check it here: https://huggingface.co/{repo_id}")

#test

## test base+adapter

In [None]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# 1️⃣ Load base model in 4-bit for memory efficiency
base_model_name = "AnjaliNV/Wellbeing_coach"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

device = "cuda" if torch.cuda.is_available() else "cpu"

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    load_in_4bit=True,
    device_map="auto"
)

# 2️⃣ Load LoRA adapter
lora_path = "./WellBeing_LoRA_updated"
model = PeftModel.from_pretrained(model, lora_path)
model.eval()
model.to(device)

# 3️⃣ Function to generate response
def generate_response(prompt, max_tokens=700, temperature=0.7, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p
        )
    decoded = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)
    return decoded

# 4️⃣ Gradio Interface
iface = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(lines=5, label="Instruction"),
        gr.Slider(50, 1024, value=700, step=1, label="Max Tokens"),
        gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p Sampling")
    ],
    outputs=gr.Textbox(label="LLM Response"),
    title="Wellbeing Coach LoRA Adapter Test",
    description="Interactively test your fine-tuned LoRA adapter without merging it into the base model."
)

# 5️⃣ Launch interface
iface.launch()


## merged model testing

In [None]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import re

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


#RAG-----------

vectorstore = FAISS.load_local(
    "/content/drive/MyDrive/rag_index",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)
#---------------------

# -------------------
# Load merged model
# -------------------
model_path = "AnjaliNV/WellBeing_Coach_LLM"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)
print(next(model.parameters()).device)


def chat_with_model(user_input):

    docs = vectorstore.similarity_search(user_input, k=3)
    context = "\n\n".join(doc.page_content for doc in docs)


    # Inject the user_input into the prompt
    prompt = f"""
You are a professional well-being coach.
Your role is to generate personalized well-being recommendations for the user.

USER INPUT:
{user_input}


INSTRUCTIONS:
- Always follow the official guidelines before making any recommendation.
- Do NOT over-recommend. Keep suggestions safe, concise, and realistic. Ask user to simple avoid the contradit food with their diet prefernce.
- Always take into account the user’s age, sex, and fitness history.
- Stop immediately after completing section 5 (Weekly Summary).
- Do NOT continue with further questions, explanations, or commentary.
- Do NOT add any “Assistant:” or “Human:” text.

TASK:
Based on the above user data, generate recommendations using EXACTLY the following structure:

1) Food Recommendation
- Overall Assessment: [...]
- Areas of Improvements: [...]
- Suggested Meals: [...]

2) Physical Activity
- Activity Assessment: [...]
- Zone Minutes and Intensity Feedback: [...]
- Strength/Cardio Tips: [Workout Suggestions]
- Weekly Goals: [...]

3) Sleep
- Sleep Review: [...]
- Sleep Suggestions: [...]

4)Mood & Mental Health
- Mood Review: [...]
- Mental Health Suggestions: [...]

5) Weekly Summary
- Summary: [...]
- Goals: [...]
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=700,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
        )

    # Decode response
    response = tokenizer.decode(output_ids[0][input_length:], skip_special_tokens=True)
    match = re.search(
            r"(1\) Food Recommendation.*?5\) Weekly Summary.*?)(?:\n[A-Z][^:]|$)",
            response,
            flags=re.DOTALL
        )
    if match:
        clean_response = match.group(1).strip()
    else:
        clean_response = response.strip()
    return response.strip()


# -------------------
# Gradio UI
# -------------------
iface = gr.Interface(
    fn=chat_with_model,
    inputs="text",
    outputs="text",
    title=" WellBeing Coach",
    description="Ask questions about well-being."
)

iface.launch(share=True, debug=True)


## cot check

In [None]:
from time import sleep
from typing_extensions import final
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.schema import Document
import gc




# === Load FAISS vector store ===
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.load_local(
    "/content/drive/MyDrive/rag_index",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)

foodstore = FAISS.load_local(
    "/content/drive/MyDrive/food_faiss_index",
    embeddings=embeddings,
    allow_dangerous_deserialization=True
)

# === Load LLaMA 3 model with LoRA adapter ===
model_path = "AnjaliNV/Merged_WellBeing_LLM"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)

# === Define RAG-based response generation ===
def generate_step(prompt, temperature=0.7, max_tokens=1024):
    try:
        print(f"Generating response with prompt length: {len(prompt)}")
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        input_length = inputs.input_ids.shape[1]


        with torch.no_grad():
              output_ids = model.generate(
                  **inputs,
                  max_new_tokens=max_tokens,
                  do_sample=True,
                  temperature=temperature,
                  top_p=0.9,
                  repetition_penalty=1.2
              )

        response = tokenizer.decode(output_ids[0][input_length:], skip_special_tokens=True)
        print(f"Generated response length: {len(response)}")
        return response.strip()

    except Exception as e:
        print(f"Error in generate_step: {e}")
        return f"Error generating response: {str(e)}"

def cleanup_gpu_memory():
    """Enhanced GPU memory cleanup"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
    gc.collect()

def generate_with_cot(user_profile, temperature=0.3, max_tokens=512, k=3):

    # search1 = f"""
    # Using the given user_profile:
    # {user_profile}
    # 1) Find guideline and meal recommendations that match the user's dietary preference from user_profile.
    # """

    # Retrieve relevant docs from vectorstore as context
    docs = vectorstore.similarity_search(user_profile, k=k)
    context = "\n\n".join(doc.page_content for doc in docs)

    prompt1 = f"""
USER PROFILE:
{user_profile}
CONTEXT: {context}
--- END OF PROFILE ---
You are a well-being coach. Consider the user's weekly data and guidance, and provide just a Food recommendation following the template below
**1) Food Recommendation**
- Overall Assessment: Write 2–3 sentences after carefully analysing the user’s food intake. Highlight inconsistencies, contradictions, and ensure alignment with diet preferences and allergies.
- Areas of Improvement: for all meal intake categories, follow the comparison statement
  - Comparison statement: "Your weekly [food] intake of **X grams** is below/above the recommended **Y grams**, which is equivalent to **Z grams per day** and **B grams per week**."
- Suggested Meals: Suggest 3–4 meals that directly address deficiencies. Always provide ingredient quantities (grams/ml) and align with the user’s dietary preferences and restrictions.

"""



    food_response  = generate_step(prompt1, temperature=0.7, max_tokens=350)
    print(f"Food response:\n {food_response}")


    prompt2 = f"""
SYSTEM INSTRUCTION:
You are a certified physical activity and fitness coach.
- Your suggestions must be safe and healthy.
- Encourage a balance of cardiovascular, strength, and flexibility exercises.
- Always consider the user's fitness level.
- Do not suggest excessive exercise.
- Tell them to rest and recover.
- Recommend stretching before and after workouts.
- Always consider any past injuries.


USER PROFILE:
{user_profile}
--- END OF PROFILE ---

TASK:
Using the USER PROFILE, generate a structured response for **Physical Activity**.
Strictly follow the template below. Do not add extra commentary or change section headings.

TEMPLATE (must be followed exactly):

**2) Physical Activity**
- Activity Assessment: Write 2–3 sentences evaluating the user’s current activity level (steps, sessions, or general lifestyle). Mention consistency with their nutrition if relevant.
- Zone Minutes and Intensity Feedback: Provide recommended weekly zone minutes (moderate/vigorous). Suggest safe ways to gradually adjust exercise intensity.
- Strength/Cardio Tips: Suggest 2–3 specific exercises with structure. Format as: "Exercise name: X sets of Y reps, Z times per week." Include both cardio and strength. Always add warm-up (5–10 min dynamic stretch) and cool-down (5–10 min static stretch).
- Weekly Goals: List 3–4 measurable and realistic activity goals. Each must include frequency and duration (e.g., "Add 1 extra strength session per week" or "Walk 6,000 steps daily").
"""


    physical_response  = generate_step(prompt2, temperature=0.7, max_tokens=350)
    print(f"Physical response:\n {physical_response}")

    prompt3 = f"""
      USER PROFILE:
      {user_profile}
      --- END OF PROFILE ---

      TASK:
      Using the USER PROFILE, generate a structured response for **Sleep, Mood, Lifestyle, and Social Health**.
      Strictly follow the template. Do not add extra commentary or change section headings.

      TEMPLATE (must be followed exactly):

      **3) Sleep & Mood**
      - Sleep and Mood Review: Write 3–4 sentences analyzing the user’s current sleep patterns, duration, quality, and mood. Highlight connections to lifestyle, diet, or activity if relevant.
      - Suggestions: Provide 4–5 actionable recommendations to improve sleep quality, mood, and overall well-being. Include measurable tips such as bedtime routines, relaxation techniques, sleep timing consistency, dietary timing, mindfulness practices, or social engagement strategies.
"""

    sleep_response  = generate_step(prompt3, temperature=0.7, max_tokens=200)
    print(f"Sleep response:\n {sleep_response}")

    prompt4 = f"""
USER PROFILE:
{user_profile}
--- END OF PROFILE ---

TASK:
Using the USER PROFILE and any context available, generate a structured **Weekly Summary** that highlights strengths, areas for improvement, and actionable goals.
Strictly follow the template. Do not add extra commentary or change section headings.

TEMPLATE (must be followed exactly):

**4) Weekly Summary**
- Summary: Write 2–3 sentences highlighting the user’s current well-being strengths, noting areas for improvement across food, physical activity, sleep, and mood.
- Goals: Provide 3–5 specific, measurable, and realistic goals for the upcoming week. Each goal should directly relate to food, exercise, sleep, or overall well-being, e.g., daily servings of fruits/vegetables, step count targets, hydration goals, sleep routines, or structured exercise sessions.
"""

    overall_response = generate_step(prompt4, temperature=0.7, max_tokens=200)
    print(f"overall response:\n {overall_response}")

    #     cleanup_gpu_memory()
    final_response = f"{food_response}\n\n{physical_response}\n\n{sleep_response}\n\n{overall_response}"

    return final_response

# === Gradio UI ===
iface = gr.Interface(
    fn=generate_with_cot,
    inputs="text",
    outputs="text",
    title="RAG-Powered WellBeing Chatbot",
    description="Ask questions about well-being. The bot uses both fine-tuned LLaMA model + RAG retrieval for grounded answers."
)

iface.launch(share=True, debug=True)


# data generation

In [None]:
import pandas as pd
import json, re, time
from openai import OpenAI

client = OpenAI(api_key="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")


# ---------- Utilities ----------
def clean_json_output(text: str):
    m = re.search(r"\{.*\}", text, re.DOTALL)
    if not m:
        return None
    s = m.group(0)
    # small cleanups
    s = s.replace('\\"', '"')
    try:
        return json.loads(s)
    except json.JSONDecodeError:
        return None

import re



def call_llm_json(prompt, temperature=0.6, max_tokens=650, retries=2):
    for i in range(retries):
        try:
            # If your SDK supports json_object, uncomment this for stricter JSON:
            # resp = client.chat.completions.create(
            #     model="gpt-4o-mini",
            #     messages=[{"role":"user","content":prompt}],
            #     temperature=temperature,
            #     max_tokens=max_tokens,
            #     response_format={"type":"json_object"}
            # )
            resp = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role":"user","content":prompt}],
                temperature=temperature,
                max_tokens=max_tokens
            )
            raw = resp.choices[0].message.content.strip()
            js = clean_json_output(raw)
            if js is not None:
                return js
            # fallback: return raw if still not JSON
            if i == retries - 1:
                return {"_raw": raw}
        except Exception as e:
            if i == retries - 1:
                return {"_error": str(e)}
            time.sleep(0.7)

def row_to_profile_text(row):
    return f"""Demographics: Gender({row['gender']}), Age({row['age']}), Height({row['height_cm']} cm), Weight({row['weight_kg']} kg), BMI({row['bmi']})
Physical Activity: Weekly Steps({row['weekly_steps']}), Zone Minutes per week({row['zone_minutes']}), Exercise Sessions per week({row['exercise_sessions']}))
Sleep: Average hours per night({row['hours_sleep']})
Dietary Preference: Declared Diet({row['Declared_Diet']}), Allergies({row['Allergies']})
Weekly Food Consumption: Dairy({row['Dairy_liters']} L), Legumes({row['Legumes_grams']} g), Meat({row['Meat_grams']} g), Fruits({row['Fruits_grams']} g), Vegetables({row['Vegetables_grams']} g), Grains({row['Grains_grams']} g), Nuts({row['Nuts_grams']} g), Water({row['hydration_level']} L)"""

def assemble_report(food, activity, sleep, summary):
    # Convert section JSONs into your exact final text format
    def get(d, k, default=""):
        return d.get(k, default) if isinstance(d, dict) else default

    # Food section (optional: omit if you’re not generating food in a given run)
    food_text = f"""**1) Food Recommendation**
- Overall Assessment: {get(food,'overall_assessment')}
- Areas of Improvements: {get(food,'areas_of_improvement')}
- Suggested Meals: {get(food,'suggested_meals')}"""

    activity_text = f"""**2) Physical Activity**
- Activity Assessment: {get(activity,'activity_assessment')}
- Zone Minutes and Intensity Feedback: {get(activity,'zone_minutes_feedback')}
- Strength/Cardio Tips: {get(activity,'strength_cardio_tips')}
- Weekly Goals: {get(activity,'weekly_goals')}"""

    sleep_text = f"""**3) Sleep & Well-being**
- Sleep Review: {get(sleep,'sleep_review')}
- Suggestions: {get(sleep,'suggestions')}"""

    summary_text = f"""**4) Weekly Summary**
- Summary: {get(summary,'summary')}
- Goals: {get(summary,'goals')}"""

    return "\n\n".join([food_text, activity_text, sleep_text, summary_text])

# ---------- Prompts (section-by-section) ----------
def prompt_food(profile_text):
    return f"""
You are a nutrition coach. Speak directly to the person (use "you" / "your") instead of saying "the user".

Based ONLY on this profile:

{profile_text}

Return JSON with keys:
{{
  "overall_assessment": "Analyze your data and weekly food intake to identify any inconsistencies, contradictions, or nutritional deficiencies in your diet.",
  "areas_of_improvement": "Each item names a low-intake group and a concrete weekly target with units. Also if you are consuming high quantity of some food then recommend reducing it.",
  "suggested_meals": "When identifying food deficiencies (e.g., low fruit intake), list a few specific examples of foods rich in the missing nutrients (e.g., oranges for Vitamin C). After highlighting these foods, provide practical meal recommendations that incorporate them."
}}
Return ONLY JSON.
"""


def prompt_activity(profile_text, food_json):
    return f"""
You are a fitness coach. Speak directly to the person (use "you" / "your").

PROFILE:
{profile_text}

FOOD CONTEXT (JSON):
{json.dumps(food_json, ensure_ascii=False)}

Return JSON with keys:
{{
  "activity_assessment": "Analyze your total weekly steps, exercise sessions, and physical data, taking into account your meal intake and overall lifestyle. Identify strengths and areas needing improvement.",
  "zone_minutes_feedback": "Evaluate your current zone minutes and exercise intensity. Provide suggestions to increase or decrease intensity or duration as appropriate.",
  "strength_cardio_tips": "Recommend specific strength and cardio exercises tailored to you, including sets, reps, frequency, and exercise type (e.g., 5 sets of squats twice a day)."
  "weekly_goals": "Food Options: For each deficiency or area of improvement, suggest 2–3 specific food examples that address the gap (e.g., “For protein: lentils, tofu, seitan. For Vitamin C: oranges, kiwi, bell peppers.”).
                    Meal Recommendations: After listing the food options, provide 2–3 complete meal ideas that incorporate these foods (e.g., “Quinoa salad with black beans, avocado, and corn.”)."
}}
Return ONLY JSON.
"""


def prompt_sleep(profile_text, food_json, activity_json):
    return f"""
You are a sleep & recovery coach. Speak directly to the person (use "you" / "your").
Use profile + FOOD + ACTIVITY context.

PROFILE:
{profile_text}

FOOD CONTEXT:
{json.dumps(food_json, ensure_ascii=False)}

ACTIVITY CONTEXT:
{json.dumps(activity_json, ensure_ascii=False)}

Return JSON with keys:
{{
  "sleep_review": "Analyze your average sleep hours and mood patterns. Identify any correlations between your diet, lifestyle, and sleep quality.",
  "suggestions": "Provide actionable recommendations or activities to improve your sleep quality and mood, tailored to your habits and lifestyle."
}}
Return ONLY JSON.
"""


def prompt_summary(profile_text, food_json, activity_json, sleep_json):
    return f"""
Create a SHORT weekly wrap-up that stitches FOOD + ACTIVITY + SLEEP into an actionable plan.
Speak directly to the person (use "you" / "your").

PROFILE:
{profile_text}

FOOD:
{json.dumps(food_json, ensure_ascii=False)}

ACTIVITY:
{json.dumps(activity_json, ensure_ascii=False)}

SLEEP:
{json.dumps(sleep_json, ensure_ascii=False)}

Return JSON with keys:
{{
  "summary": "Write 2–3 sentences only. Keep it concise, focus on the *big picture*. Address the person as 'you'.",
  "goals": "List 3–5 top goals max. Each goal ≤15 words, clearly actionable, written as 'your goals' or 'you should'. Example: 'Increase your daily steps to 8k'."
}}
Return ONLY JSON.
"""



# ---------- One-row pipeline ----------
def generate_full_report_for_row(row):
    profile_text = row_to_profile_text(row)

    food = call_llm_json(prompt_food(profile_text), temperature=0.7)
    activity = call_llm_json(prompt_activity(profile_text, food), temperature=0.7)
    sleep = call_llm_json(prompt_sleep(profile_text, food, activity), temperature=0.7)
    weekly_summary = call_llm_json(prompt_summary(profile_text, food, activity, sleep), temperature=0.6)

    final_text = assemble_report(food, activity, sleep, weekly_summary)

    return {
        "input": profile_text,
        "output": final_text
    }

# ---------- Example: run for first 5 rows ----------
import os
import json
import pandas as pd

if __name__ == "__main__":
    weekly_df = pd.read_csv("/content/train_sample_enriched_random (1).csv")
    sample = weekly_df.iloc[1700:2000]   # control how many to generate

    output_path = "diet_dataset_stepwise.jsonl"

    # Open file once in append mode
    with open(output_path, "w", encoding="utf-8") as f:
        for i, (_, row) in enumerate(sample.iterrows(), 1):
            item = generate_full_report_for_row(row)

            # Write immediately
            f.write(json.dumps(item, ensure_ascii=False) + "\n")

            # Flush every 10 rows so data is guaranteed saved
            if i % 10 == 0:
                f.flush()
                os.fsync(f.fileno())  # ensure OS flush
                print(f"✅ Saved {i} rows so far...")


In [None]:
from datasets import load_dataset, Dataset, concatenate_datasets
import pandas as pd

from datasets import load_dataset, Dataset, concatenate_datasets
import json

# 🔹 Path to your generated JSONL file
data_path = "diet_dataset_stepwise.jsonl"

# Load your new dataset from JSONL
new_dataset = load_dataset("json", data_files=data_path, split="train")

# (Optional) Load existing dataset from Hub
existing_dataset = load_dataset("AnjaliNV/Templete2", split="train")

# 🔹 Combine both if needed
combined_dataset = concatenate_datasets([existing_dataset, new_dataset])

# Push to Hub
combined_dataset.push_to_hub("AnjaliNV/Templete2")


# Evaluation

In [None]:
!pip install evaluate