In [None]:
!pip install rank-bm25 transformers nltk llama-cpp-python
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import torch
from rank_bm25 import BM25Okapi
import nltk
from llama_cpp import Llama

In [None]:
file_path_pqau = "/content/ori_pqau.json"
with open(file_path_pqau, "r") as f:
    pqa_u = json.load(f)

corpus = []
for key, value in pqa_u.items():
    context = " ".join(value["CONTEXTS"])
    corpus.append(context)

tokenized_corpus = [nltk.word_tokenize(doc.lower()) for doc in corpus]
bm25 = BM25Okapi(tokenized_corpus)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from llama_cpp import Llama
import os
import shutil

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_classify_path = "/content/results_biobert_finetuned"

tokenizer_classify = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")
model_classify = AutoModelForSequenceClassification.from_pretrained(model_classify_path)
model_classify.to(device)

print("Loading LLaMA 7B model...")
llm = Llama(
    model_path="/content/llama-2-7b-chat.Q4_0.gguf",
    n_ctx=2048,
    n_threads=8,
    n_gpu_layers=0,
    verbose=False
)
print(" Tokenizer v√† c√°c m√¥ h√¨nh ƒë√£ ƒë∆∞·ª£c t·∫£i th√†nh c√¥ng!")


In [None]:
def retrieve_evidence(question, top_k=3):
    """Retrieve top-k relevant evidence documents with BM25 scores"""
    tokenized_query = nltk.word_tokenize(question.lower())
    scores = bm25.get_scores(tokenized_query)
    top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
    retrieved_docs = [corpus[i] for i in top_indices]
    retrieved_scores = [scores[i] for i in top_indices]
    return retrieved_docs, retrieved_scores

def classify_answer(question, evidence):
    """Classify answer as yes/no/maybe using fine-tuned model"""
    combined_context = " ".join(evidence)
    inputs = tokenizer_classify(
        question,
        combined_context,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model_classify(**inputs)
        pred = torch.argmax(outputs.logits, dim=1).item()

    label = "yes" if pred == 0 else "no" if pred == 1 else "maybe"
    return label

def generate_comprehensive_answer(question, evidence_docs, evidence_scores):
    """Generate detailed answer using LLaMA with retrieved evidence"""

    evidence_text = ""
    for i, (doc, score) in enumerate(zip(evidence_docs, evidence_scores)):
        evidence_text += f"[Document {i+1}, Relevance: {score:.3f}]: {doc}\n\n"

    prompt = f"""<s>[INST] <<SYS>>
You are a professional medical assistant. Answer the question based STRICTLY on the provided evidence.
Use only information from the evidence documents below.

ANSWER STRUCTURE:
1. First, provide a short answer: "yes", "no", or "maybe"
2. Then, explain in detail based on the evidence
3. Finally, assess the reliability of the evidence

QUESTION: {question}

EVIDENCE:
{evidence_text}
<</SYS>>

Please answer the question based on the provided evidence. [/INST]"""

    response = llm(
        prompt,
        max_tokens=512,
        temperature=0.3,
        top_p=0.9,
        echo=False,
        stop=["</s>", "[INST]"]
    )

    return response['choices'][0]['text'].strip()

def answer_biomedical_question(question, top_k_retrieve=3):
    """Main pipeline for biomedical question answering"""
    print(f"üîç Question: {question}")

    evidence_docs, evidence_scores = retrieve_evidence(question, top_k=top_k_retrieve)
    print(f"üìö Retrieved {len(evidence_docs)} relevant documents")

    short_answer = classify_answer(question, evidence_docs)
    print(f"‚úÖ Short answer: {short_answer}")

    detailed_answer = generate_comprehensive_answer(question, evidence_docs, evidence_scores)

    return {
        "question": question,
        "short_answer": short_answer,
        "detailed_answer": detailed_answer,
        "retrieved_evidence": evidence_docs,
        "relevance_scores": evidence_scores
    }


In [None]:
if __name__ == "__main__":
    test_question = "Does aspirin reduce the risk of heart attack in patients with diabetes?"

    print("üöÄ Starting biomedical QA pipeline...")
    result = answer_biomedical_question(test_question)

    print("\n" + "="*60)
    print("üéØ FINAL RESULTS:")
    print(f"Question: {result['question']}")
    print(f"Short Answer: {result['short_answer']}")
    print(f"Detailed Answer:\n{result['detailed_answer']}")
    print(f"\nNumber of Retrieved Documents: {len(result['retrieved_evidence'])}")
    for i, (doc, score) in enumerate(zip(result['retrieved_evidence'], result['relevance_scores'])):
        print(f"Document {i+1} (Score: {score:.3f}): {doc[:100]}...")