In [None]:
def generate_non_rag_answer(question):
    """Generate answer using only LLaMA's internal knowledge without retrieval"""

    prompt = f"""<s>[INST] <<SYS>>
You are a professional medical assistant with expertise in biomedical knowledge.
Answer the following biomedical question to the best of your ability based on your training.

ANSWER STRUCTURE:
1. First, provide a short answer: "yes", "no", or "maybe"
2. Then, explain in detail based on your biomedical knowledge
3. If you're uncertain, please indicate the limitations of your knowledge

QUESTION: {question}
<</SYS>>

Please answer the biomedical question based on your knowledge. [/INST]"""

    response = llm(
        prompt,
        max_tokens=512,
        temperature=0.3,
        top_p=0.9,
        echo=False,
        stop=["</s>", "[INST]"]
    )

    return response['choices'][0]['text'].strip()

def classify_non_rag_answer(question):
    """Classify answer using only the question (no evidence)"""
    inputs = tokenizer_classify(
        question,
        "",  
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model_classify(**inputs)
        pred = torch.argmax(outputs.logits, dim=1).item()

    label = "yes" if pred == 0 else "no" if pred == 1 else "maybe"
    return label

def answer_biomedical_question_non_rag(question):
    """Non-RAG pipeline for biomedical question answering"""
    print(f"üîç Question: {question}")

    short_answer = classify_non_rag_answer(question)
    print(f" Short answer: {short_answer}")

    detailed_answer = generate_non_rag_answer(question)

    return {
        "question": question,
        "short_answer": short_answer,
        "detailed_answer": detailed_answer,
        "retrieved_evidence": [],
        "relevance_scores": [],
        "method": "non_rag"
    }

def compare_rag_vs_non_rag(question):
    """So s√°nh k·∫øt qu·∫£ gi·ªØa RAG v√† Non-RAG"""
    print("=" * 80)
    print("ü§ñ COMPARISON: RAG vs NON-RAG")
    print("=" * 80)

    print("\nüîÑ RAG APPROACH:")
    rag_result = answer_biomedical_question(question)

    print("\n‚ö° NON-RAG APPROACH:")
    non_rag_result = answer_biomedical_question_non_rag(question)

    return {
        "rag": rag_result,
        "non_rag": non_rag_result
    }

if __name__ == "__main__":
    test_question = "Does smoking cause lung cancer?"

    print(" RUNNING NON-RAG VERSION ONLY:")
    non_rag_result = answer_biomedical_question_non_rag(test_question)

    print("\nüìä FINAL NON-RAG RESULT:")
    print(f"Question: {non_rag_result['question']}")
    print(f"Short answer: {non_rag_result['short_answer']}")
    print(f"Detailed answer: {non_rag_result['detailed_answer']}")
