# Install Libraries

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install transformers==4.46.0 tokenizers==0.20.3 accelerate>=0.33 bitsandbytes==0.43.1
!pip install langchain==0.1.20 langchain-community==0.0.38 langchain-core==0.1.52
!pip install sentence-transformers==3.0.1 faiss-cpu==1.8.0
!pip install pypdf==3.17.4
!pip install protobuf==3.20.3

def verify_installation():
    try:
        import torch
        print(f"‚úÖ PyTorch: {torch.__version__}")
        print(f"   CUDA available: {torch.cuda.is_available()}")
        
        import transformers
        print(f"‚úÖ Transformers: {transformers.__version__}")
        
        import langchain
        print(f"‚úÖ LangChain: {langchain.__version__}")
        
        import sentence_transformers
        print(f"‚úÖ Sentence Transformers: {sentence_transformers.__version__}")
        
        import faiss
        print(f"‚úÖ FAISS: Installed")
        
        import pypdf
        print(f"‚úÖ PyPDF: {pypdf.__version__}")
        
        print("\nüéâ All packages installed correctly!")
        return True
        
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return False

verify_installation()

# Import Libraries

In [1]:
import numpy as np
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/dietplan/diet1.pdf


In [2]:
import re
import json
import torch
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain_community.llms import HuggingFacePipeline  
from langchain_community.document_loaders import PyPDFLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

2025-11-26 00:51:53.967486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764118313.992354     534 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764118313.999983     534 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
from huggingface_hub import login
login("HF-TOKEN")

# Quantize Model

## Configurations

In [4]:
llm_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" # "mistralai/Mistral-7B-Instruct-v0.2"
save_dir = "Meta_Llama-3.1_8B_Instruct_4bit_bnb" # "mistral_7B_Instruct_v0.2_4bit_bnb"
bnb_cfg = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16)

## Load model

In [5]:
model = AutoModelForCausalLM.from_pretrained(llm_name, quantization_config=bnb_cfg, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(llm_name, use_fast=True)

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

## Save quantized model

In [6]:
try:
    model.save_pretrained(save_dir, safe_serialization=True)
    tokenizer.save_pretrained(save_dir)
    print("Saved quantized model to:", save_dir)
except Exception as e:
    print("Save 4-bit not supported in this env:", e)

Saved quantized model to: Meta_Llama-3.1_8B_Instruct_4bit_bnb


# Load Models

In [4]:
save_dir = "Meta_Llama-3.1_8B_Instruct_4bit_bnb"

In [5]:
def load_model_and_tokenizer(path_or_id):
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
    tokenizer = AutoTokenizer.from_pretrained(path_or_id, use_fast=True)
    model = AutoModelForCausalLM.from_pretrained(
        path_or_id,
        quantization_config=bnb_cfg,
        device_map="auto",
        torch_dtype=torch.bfloat16,
    )
    return tokenizer, model

In [6]:
def load_model_and_tokenizer(path_or_id):
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
    
    # Load tokenizer with trust_remote_code
    tokenizer = AutoTokenizer.from_pretrained(
        path_or_id, 
        use_fast=True,
        trust_remote_code=True  # Add this
    )
    
    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        path_or_id,
        quantization_config=bnb_cfg,
        device_map="auto",
        trust_remote_code=True  # Add this
    )
    
    return tokenizer, model

In [7]:
if Path(save_dir).exists():
    print(f"Loading local 4-bit checkpoint: {save_dir}")
    tokenizer, model = load_model_and_tokenizer(save_dir)
else:
    print("Local 4-bit checkpoint not found (saving likely not supported in this env).")

Loading local 4-bit checkpoint: Meta_Llama-3.1_8B_Instruct_4bit_bnb


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_new_tokens=512,
    do_sample=False,
    temperature=0.1,
    top_p=0.9
)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

# RAG Setup

## Create VectorDB

In [9]:
# Load PDF
loader = PyPDFLoader("/kaggle/input/dietplan/diet1.pdf")
docs = loader.load()

# Split
splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=200
)
chunks = splitter.split_documents(docs)

# Build embeddings
embeddings = HuggingFaceEmbeddings(
    model_name= "intfloat/e5-large-v2", # "BAAI/bge-m3"
)

# Build vector database
vectordb = FAISS.from_documents(chunks, embeddings)
retriever = vectordb.as_retriever(k=5)

# Save to folder
vectordb.save_local("/kaggle/working/diet_vectorstore")

print("Vectorstore created!")

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


Vectorstore created!


In [10]:
# Get the internal document IDs
doc_store_ids = vectordb.index_to_docstore_id.values()
print(f"Total number of text chunks: {len(doc_store_ids)}")

# Print the content for the first 5 documents for a quick look
limit = 5 
print(f"Printed {limit} chunks")
for i, doc_id in enumerate(doc_store_ids):
    if i >= limit:
        break
    
    # Retrieve the actual Document object (which contains the text and metadata)
    doc = vectordb.docstore._dict.get(doc_id)
    
    print(f"\n|| Chunk {i} ||")
    print(doc.page_content)

Total number of text chunks: 32
Printed 5 chunks

|| Chunk 0 ||
Day 1: 
Breakfast:
1 low-fat plain Greek yogurt (6oz)
¬æ cup blueberries
12 almonds or 2 tablespoons  of ground flaxseed meal 
Coffee with milk and a sugar substitute
Lunch: 
1 piece of grilled chicken (4oz)
1 whole grain wrap (substitute corn as a gluten-free option)
Fill with onions, peppers, spinach, 
and ‚Öì avocado 1¬º cup of strawberries on the side
Snack: 
1 small apple with 1 tablespoon all 
natural nut butter (peanut, almond, 
cashew, sunflower seed) 
Dinner: 
1 grilled *turkey burger (4oz)1 medium baked sweet potato 
topped with cinnamon
1 cup spinach (saut√©ed with garlic and 1 teaspoon olive oil)
Side salad with 1 tablespoon dressing
* Made with 99% lean ground white meatDay 2:  
Breakfast: 
3 egg whites and 1 whole egg, 
scrambled 
2 slices of whole grain bread

|| Chunk 1 ||
Side salad with 1 tablespoon dressing
* Made with 99% lean ground white meatDay 2:  
Breakfast: 
3 egg whites and 1 whole egg, 
scramble

# Classify Query

In [11]:
def smart_router(query):
    query_lower = query.lower()
    
    # ===== PLAN INDICATORS =====
    plan_keywords = [
        # Days
        'day ', 'week ',
        'today', 'tomorrow',
        # Meals
        'breakfast', 'lunch', 'dinner', 'snack',
        # Plan references
        'my plan', 'the plan', 'diet plan', 'in the plan', 'from the plan',
        'schedule', 'scheduled'
    ]
    
    if any(keyword in query_lower for keyword in plan_keywords):
        print("üîÑ Router: Plan Reference")
        return "DOCUMENT"
    
    # ===== GENERAL KNOWLEDGE INDICATORS =====
    general_indicators = [
        # Recipe requests
        'how to cook', 'how to make', 'how to prepare', 'recipe for',
        # General nutrition
        'benefits of', 'what is', 'why is', 'should i', 'is it good',
        # Health advice
        'healthy', 'nutrition', 'calories in', 'protein in'
    ]
    
    if any(indicator in query_lower for indicator in general_indicators):
        print("üîÑ Router: General knowledge")
        return "GENERAL"
    
    print("üîÑ Router: Default")
    return "DEFAULT"

# Output Parser

## Plan Answer

In [12]:
document_response_schemas = [
    ResponseSchema(name="answer", description="The main answer to the user's question"),
    ResponseSchema(name="items", description="List of food items with portions if applicable", type="list"),
    ResponseSchema(name="source", description="Where this information came from in the plan"),
    ResponseSchema(name="confidence", description="How confident you are in this answer", type="string")
]

In [13]:
document_parser = StructuredOutputParser.from_response_schemas(document_response_schemas)

## General Answer

In [14]:
general_response_schemas = [
    ResponseSchema(name="answer", description="The main answer to the user's question"),
    ResponseSchema(name="key_points", description="Key points as bullet points", type="list"),
    ResponseSchema(name="notes", description="Any important notes or disclaimers", type="string")
]

In [15]:
general_parser = StructuredOutputParser.from_response_schemas(general_response_schemas)

# Prompts

In [16]:
DOCUMENT_PROMPT = PromptTemplate(
    input_variables=["context", "query"],
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Answer the user's question using ONLY the provided diet plan context.

IMPORTANT:
- Extract exact information from the context
- Be specific about portions and quantities
- If information is not found, set confidence to "low" and explain what's missing
- ALWAYS respond with ONLY a JSON object in this exact format:

```json
{{
	"answer": "your main answer here",
	"items": ["list", "of", "food", "items"],
	"source": "where this came from in the plan", 
	"confidence": "high/medium/low"
}}
CONTEXT:
{context}

<|eot_id|><|start_header_id|>user<|end_header_id|>
Question: {query}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
)

In [17]:
GENERAL_PROMPT = PromptTemplate(
    input_variables=["query"],
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful nutrition expert. Provide clear, structured answers to general nutrition questions.

ALWAYS respond with ONLY a JSON object in this exact format:

```json
{{
	"answer": "your main answer here",
	"key_points": ["point 1", "point 2", "point 3"],
	"notes": "any important notes"
}}
<|eot_id|><|start_header_id|>user<|end_header_id|>
Question: {query}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
)

In [18]:
DEFAULT_PROMPT = PromptTemplate(
    input_variables=["context", "query"],
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Try to answer using the diet plan context first. 
If the information is not in the plan, use your general nutrition knowledge.

STRATEGY:
1. FIRST check if the query can be answered from the diet plan context
2. If found in plan: provide specific details with portions
3. If NOT in plan: use your general nutrition knowledge to provide helpful advice
4. ALWAYS respond with ONLY a JSON object in this exact format:

```json
{{
	"answer": "your main answer here",
	"items": ["list", "of", "food", "items"],
	"source": "where this came from (plan or general knowledge)", 
	"confidence": "high/medium/low"
}}
CONTEXT:
{context}

<|eot_id|><|start_header_id|>user<|end_header_id|>
Question: {query}

<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
)

In [19]:
def format_general_response(parsed_response):
    output = [f"üí° {parsed_response['answer']}"]
    
    if parsed_response.get("key_points"):
        output.append("\nüåü Key Points:")
        for point in parsed_response["key_points"]:
            output.append(f"  ‚Ä¢ {point}")
    
    if parsed_response.get("notes"):
        output.append(f"\nüìù Note: {parsed_response['notes']}")
    
    return "\n".join(output)

# Chains

In [20]:
def extract_json_from_response(full_response):
    # Look for JSON code blocks
    json_pattern = r'```json\s*(.*?)\s*```'
    matches = re.findall(json_pattern, full_response, re.DOTALL)
    
    if matches:
        # Take the first JSON code block found
        json_str = matches[0].strip()
        try:
            return json.loads(json_str)
        except json.JSONDecodeError:
            # If JSON is malformed, try to extract just the JSON object
            json_obj_pattern = r'\{.*\}'
            obj_match = re.search(json_obj_pattern, full_response, re.DOTALL)
            if obj_match:
                try:
                    return json.loads(obj_match.group())
                except:
                    pass
    
    # Fallback: try to find any JSON-like structure
    try:
        start_idx = full_response.find('{')
        end_idx = full_response.rfind('}') + 1
        if start_idx != -1 and end_idx != -1:
            json_str = full_response[start_idx:end_idx]
            return json.loads(json_str)
    except:
        pass
    
    raise ValueError("Could not extract valid JSON from response")

In [21]:
def create_parsed_chain(prompt, parser):
    chain = LLMChain(llm=llm, prompt=prompt)
    
    def run_with_parsing(inputs):
        raw_response = chain.run(inputs)
        print(f"üì® Raw response preview: {raw_response[:200]}...")  # Debug
        
        try:
            parsed_data = extract_json_from_response(raw_response)
            return parsed_data
        except Exception as e:
            print(f"‚ö†Ô∏è Parsing failed, using fallback: {e}")
            # Fallback: return simple structure
            return {
                "answer": raw_response.split('<|start_header_id|>assistant<|end_header_id|>')[-1].strip(),
                "error": "Automatic parsing failed"
            }
    
    return run_with_parsing

In [22]:
document_chain = create_parsed_chain(DOCUMENT_PROMPT, document_parser)
general_chain = create_parsed_chain(GENERAL_PROMPT, general_parser)
default_chain = create_parsed_chain(DEFAULT_PROMPT, ["answer", "items", "source", "confidence"])

  warn_deprecated(


# Main

In [23]:
def format_document_response(parsed_response):
    answer = parsed_response.get("answer", "No answer provided")
    
    # If answer already contains the full response, just return it cleaned up
    if "```json" in answer:
        # Extract the actual answer text
        lines = answer.split('\n')
        clean_lines = [line for line in lines if not line.strip().startswith('```')]
        answer = '\n'.join(clean_lines).strip()
    
    return f"üìã {answer}"

def format_general_response(parsed_response):
    answer = parsed_response.get("answer", "No answer provided")
    
    # If answer already contains the full response, clean it up
    if "```json" in answer:
        lines = answer.split('\n')
        clean_lines = [line for line in lines if not line.strip().startswith('```')]
        answer = '\n'.join(clean_lines).strip()
    
    return f"üí° {answer}"

def format_default_response(parsed_response):
    answer = parsed_response.get("answer", "No answer provided")
    
    # If answer already contains the full response, just return it cleaned up
    if "```json" in answer:
        # Extract the actual answer text
        lines = answer.split('\n')
        clean_lines = [line for line in lines if not line.strip().startswith('```')]
        answer = '\n'.join(clean_lines).strip()
    
    return f"üîç {answer}"

In [24]:
def answer_question(query):    
    # Determine mode
    mode = smart_router(query)
    
    try:
        if mode == "DOCUMENT":
            # Retrieve relevant context
            docs = retriever.get_relevant_documents(query)
            context_text = "\n".join([doc.page_content for doc in docs])
            
            print(f"üìÑ Found {len(docs)} relevant sections")
            
            # Get parsed response
            parsed_response = document_chain({
                "context": context_text, 
                "query": query
            })
            
            # Format for user
            return format_document_response(parsed_response)
            
        elif mode == "GENERAL":
            print("üß† Using general knowledge")
            
            # Get parsed response
            parsed_response = general_chain({
                "query": query
            })
            
            # Format for user
            return format_general_response(parsed_response)

        else:  # DEFAULT mode
            print("üîÑ Default mode")
            docs = retriever.get_relevant_documents(query)
            context_text = "\n".join([doc.page_content for doc in docs])
            
            print(f"üìÑ Found {len(docs)} relevant sections to check")
            
            # Use DEFAULT chain which will try plan first, then general knowledge
            parsed_response = default_chain({
                "context": context_text, 
                "query": query
            })
            
            # Format for user with special DEFAULT formatting
            return format_default_response(parsed_response)
            
    except Exception as e:
        return f"‚ùå Sorry, I encountered an error: {str(e)}"

# Run

In [28]:
def test():
    test_questions = [
        "what I have to eat in day 3 dinner?",
        "what are the benefits of blueberries?",
        "how to make healthy cheesecake?",
        "can I replace rice with quinoa in my plan?"
    ]
    
    for question in test_questions:
        print(f"\nüß™ Testing: {question}")
        try:
            result = answer_question(question)
            print(result)
            print("---")
        except Exception as e:
            print(f"‚ùå Error: {e}")
            print("---")

test()

  warn_deprecated(
  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



üß™ Testing: what I have to eat in day 3 dinner?
üîÑ Router: Plan Reference
üìÑ Found 4 relevant sections


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Answer the user's question using ONLY the provided diet plan context.

IMPORTANT:
- Extract exact inf...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
üìã {
  "answer": "1 grilled *turkey burger (4oz), 1 medium baked sweet potato topped with cinnamon, 1 cup spinach (saut√©ed with garlic and 1 teaspoon olive oil), Side salad with 1 tablespoon dressing",
  "items": ["grilled *turkey burger", "1 medium baked sweet potato", "1 cup spinach", "Side salad with 1 tablespoon dressing"],
  "source": "Day 3:  Breakfast: 1 whole grain English muffin 1 tablespoon peanut butter ¬æ cup blueberries2 slices low-sodium turkey Lunch: Tuna Salad1 can of tuna in water, drained 1 whole grain wrap",
  "confidence": "high"
}
---

üß™ Testing: what are the benefits of blueberries?
üîÑ Router: General knowledge
üß† Using general knowledge


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful nutrition expert. Provide clear, structured answers to general nutrition questions.

ALWAYS respond with ONLY a JSON objec...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
üí° {
	"answer": "Blueberries are a nutrient-rich food that provides several health benefits due to their high content of antioxidants, vitamins, and minerals. Some of the key benefits of blueberries include:",
	"key_points": [
		"Rich in antioxidants, which can help protect against cell damage and reduce the risk of chronic diseases such as heart disease, cancer, and cognitive decline",
		"High in fiber, which can help promote digestive health and support healthy blood sugar levels",
		"Good source of vitamins C and K, as well as manganese and copper, which are essential for healthy bones and immune function",
		"May help improve memory and cognitive function due to their high

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful nutrition expert. Provide clear, structured answers to general nutrition questions.

ALWAYS respond with ONLY a JSON objec...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
üí° {
	"answer": "To make a healthy cheesecake, consider the following steps: Use a whole-grain crust made from almond flour or whole-wheat pastry flour, choose a lower-fat cream cheese and a non-fat plain Greek yogurt to reduce saturated fat and calories. Add in some protein-rich Greek yogurt to increase the protein content. Use natural sweeteners like honey or maple syrup instead of refined sugar. Finally, load up on fresh fruits like berries or citrus zest to add flavor and antioxidants.",
	"key_points": [
		"Use a whole-grain crust",
		"Choose lower-fat cream cheese and non-fat plain Greek yogurt",
		"Add protein-rich Greek yogurt and natural sweeteners",
		"Load up on fres

In [None]:
if __name__ == "__main__":
    print("ü§ñ Smart Diet Assistant Ready!")
    
    
    print("\n" + "="*50)
    print("Type 'exit' to quit.\n")
    
    while True:
        user_query = input("\nüí¨ Ask a question: ")
        if user_query.lower() in ["exit", "quit"]:
            break
        
        try:
            answer = answer_question(user_query)
            print(f"\n{answer}\n")
            
        except Exception as e:
            print(f"\n‚ùå Error: {str(e)}\n")

# GUI

In [32]:
!pip install fastapi uvicorn pyngrok nest-asyncio



In [None]:
# Restart server
import os
import signal
print("üî¥ Stopping old server...")
os.system("kill -9 $(lsof -t -i:8000) 2>/dev/null")

üî¥ Stopping old server...


In [None]:
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
from pyngrok import ngrok
import nest_asyncio
import threading
import uvicorn
import time
import json
import re

# ============================================
# CONFIGURATION
# ============================================

NGROK_AUTH_TOKEN = "NGROK-TOKEN"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# ============================================
# HELPER FUNCTIONS
# ============================================

def clean_json_response(raw_response):
    try:
        # Step 1: Get the outer structure
        if isinstance(raw_response, dict):
            outer_data = raw_response
        else:
            outer_data = extract_json_from_response(str(raw_response))
        
        # Step 2: Get the answer field
        answer_field = outer_data.get("answer", "")
        
        # Step 3: Check if answer itself is JSON (nested structure)
        if isinstance(answer_field, str) and answer_field.strip().startswith('{'):
            try:
                # Parse the nested JSON
                inner_json = json.loads(answer_field)
                
                # Extract all fields from the nested JSON
                return {
                    "answer": inner_json.get("answer", ""),
                    "items": inner_json.get("items", []),
                    "source": inner_json.get("source", ""),
                    "confidence": inner_json.get("confidence", "medium"),
                    "key_points": inner_json.get("key_points", []),
                    "notes": inner_json.get("notes", "")
                }
            except json.JSONDecodeError:
                # If parsing fails, use the string as-is
                pass
        
        # Step 4: If not nested, use outer structure
        return {
            "answer": answer_field,
            "items": outer_data.get("items", []),
            "source": outer_data.get("source", ""),
            "confidence": outer_data.get("confidence", "medium"),
            "key_points": outer_data.get("key_points", []),
            "notes": outer_data.get("notes", "")
        }
        
    except Exception as e:
        print(f"‚ùå Error cleaning response: {e}")
        import traceback
        traceback.print_exc()
        
        # Fallback
        if isinstance(raw_response, dict):
            answer = raw_response.get("answer", str(raw_response))
            # Try one more time to parse if it's JSON string
            if isinstance(answer, str) and answer.strip().startswith('{'):
                try:
                    nested = json.loads(answer)
                    return {
                        "answer": nested.get("answer", answer),
                        "items": nested.get("items", []),
                        "source": nested.get("source", ""),
                        "confidence": nested.get("confidence", "low")
                    }
                except:
                    pass
            
            return {
                "answer": answer,
                "items": [],
                "source": "model",
                "confidence": "low"
            }
        
        return {
            "answer": str(raw_response),
            "items": [],
            "source": "model",
            "confidence": "low"
        }

# ============================================
# REQUEST/RESPONSE
# ============================================

class QueryRequest(BaseModel):
    query: str

class QueryResponse(BaseModel):
    answer: str
    mode: str
    items: list = []
    source: str = ""
    confidence: str = ""
    key_points: list = []
    notes: str = ""

# ============================================
# FASTAPI APP
# ============================================

app = FastAPI(
    title="Smart Diet Assistant API",
    description="AI-powered diet plan assistant",
    version="1.0.0"
)

@app.get("/")
async def root():
    return {
        "status": "healthy",
        "message": "ü•ó Smart Diet Assistant API",
        "version": "1.0.0"
    }

@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "model": "Meta-Llama-3.1-8B-Instruct",
        "vectordb": "FAISS with e5-large-v2"
    }

@app.post("/ask", response_model=QueryResponse)
async def ask_question(request: QueryRequest):
    """Ask a diet-related question"""
    try:
        query = request.query.strip()
        
        if not query:
            raise HTTPException(status_code=400, detail="Query cannot be empty")
        
        # Use your existing answer_question function
        mode = smart_router(query)
        
        if mode == "DOCUMENT":
            docs = retriever.get_relevant_documents(query)
            context_text = "\n".join([doc.page_content for doc in docs])
            raw_response = document_chain({"context": context_text, "query": query})
            
        elif mode == "GENERAL":
            raw_response = general_chain({"query": query})
            
        else:  # DEFAULT
            docs = retriever.get_relevant_documents(query)
            context_text = "\n".join([doc.page_content for doc in docs])
            raw_response = default_chain({"context": context_text, "query": query})
        
        # Clean the response (remove JSON formatting)
        cleaned = clean_json_response(raw_response)
        
        return QueryResponse(
            answer=cleaned["answer"],
            mode=mode,
            items=cleaned.get("items", []),
            source=cleaned.get("source", ""),
            confidence=cleaned.get("confidence", "medium"),
            key_points=cleaned.get("key_points", []),
            notes=cleaned.get("notes", "")
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/upload-diet-plan")
async def upload_diet_plan(file: UploadFile = File(...)):
    """Upload a new diet plan PDF"""
    try:
        if not file.filename.lower().endswith(".pdf"):
            raise HTTPException(status_code=400, detail="File must be a PDF")
        
        pdf_bytes = await file.read()
        temp_path = "/kaggle/working/temp_diet.pdf"
        
        with open(temp_path, "wb") as f:
            f.write(pdf_bytes)
        
        # Process the PDF
        loader = PyPDFLoader(temp_path)
        docs = loader.load()
        
        splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
        chunks = splitter.split_documents(docs)
        
        # Update global vectordb
        global vectordb, retriever
        vectordb = FAISS.from_documents(chunks, embeddings)
        retriever = vectordb.as_retriever(k=5)
        vectordb.save_local("/kaggle/working/diet_vectorstore")
        
        return {
            "status": "success",
            "chunks": len(chunks),
            "filename": file.filename
        }
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# ============================================
# START SERVER
# ============================================

print("Configuring ngrok tunnel...")
public_url = ngrok.connect(8000)

print(f"\n{'='*70}")
print(f"üöÄ PUBLIC URL: {public_url}")
print(f"{'='*70}")
print(f"\nAPI Docs: {public_url}/docs")
print(f"ReDoc: {public_url}/redoc")
print(f"\n{'='*70}\n")

# Apply nest_asyncio for Jupyter
nest_asyncio.apply()

# Start server in background
def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()

print("‚úÖ Server is running!")
print("\nUse this URL for your Streamlit app:")
print(f"   API_URL = '{public_url}'")
print()

# Keep server alive
try:
    print("\nServer running...")
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("\nüõë Server stopped")

üîß Configuring ngrok tunnel...

üöÄ PUBLIC URL: NgrokTunnel: "https://trophoplasmic-removed-cecily.ngrok-free.dev" -> "http://localhost:8000"

üìö API Docs: NgrokTunnel: "https://trophoplasmic-removed-cecily.ngrok-free.dev" -> "http://localhost:8000"/docs
üìñ ReDoc: NgrokTunnel: "https://trophoplasmic-removed-cecily.ngrok-free.dev" -> "http://localhost:8000"/redoc


‚úÖ Server is running!

üîó Save this URL for your Streamlit app:
   API_URL = 'NgrokTunnel: "https://trophoplasmic-removed-cecily.ngrok-free.dev" -> "http://localhost:8000"'

üìù Example Python request:

import requests

response = requests.post(
    'NgrokTunnel: "https://trophoplasmic-removed-cecily.ngrok-free.dev" -> "http://localhost:8000"/ask',
    json={'query': 'What should I eat for day 1 breakfast?'}
)
print(response.json())


‚è≥ Server running... Press Ctrl+C to stop



INFO:     Started server process [534]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
  warn_deprecated(
  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Default
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Try to answer using the diet plan context first. 
If the information is not in the plan, use your gen...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Plan Reference
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Answer the user's question using ONLY the provided diet plan context.

IMPORTANT:
- Extract exact inf...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Default
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Try to answer using the diet plan context first. 
If the information is not in the plan, use your gen...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Plan Reference
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Answer the user's question using ONLY the provided diet plan context.

IMPORTANT:
- Extract exact inf...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Default
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Try to answer using the diet plan context first. 
If the information is not in the plan, use your gen...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: General knowledge
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful nutrition expert. Provide clear, structured answers to general nutrition questions.

ALWAYS respond with ONLY a JSON objec...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


üîÑ Router: Default
üì® Raw response preview: <|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful diet plan assistant. Try to answer using the diet plan context first. 
If the information is not in the plan, use your gen...
‚ö†Ô∏è Parsing failed, using fallback: Could not extract valid JSON from response
INFO:     41.45.62.179:0 - "POST /ask HTTP/1.1" 200 OK

üõë Server stopped
