In [None]:
import torch
print(torch.cuda.is_available())  # Should print True
print(torch.cuda.get_device_name(0))  # Prints GPU name

In [None]:
!pip install transformers datasets torch pandas numpy scikit-learn rouge-score nltk scispacy

!pip install sentence-transformers

In [None]:
pip install numpy torch transformers bert-score rouge-score nltk spacy sentence-transformers mover-score meteor-score bart-score pyemd evaluate

In [None]:
pip install pyemd pytorch_pretrained_bert

In [None]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_ner_bc5cdr_md-0.5.1.tar.gz

In [None]:
!pip uninstall -y numpy thinc spacy scispacy
!pip install numpy==1.26.4
!pip install spacy==3.7.2
!pip install scispacy==0.5.1


In [None]:
!pip install  bert-score transformers torch sentence-transformers spacy nltk pandas rouge-score accelerate bitsandbytes 

In [None]:
import nltk
nltk.download("punkt")

**Importing Libraries**

In [None]:
# Block 1: Import Libraries
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import transformers
import logging
import re
import spacy
from nltk.tokenize import word_tokenize
import nltk
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from sentence_transformers import SentenceTransformer, util
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Suppress roberta-large warnings
logging.getLogger("transformers").setLevel(logging.ERROR)

# Download NLTK data
nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("wordnet")

In [None]:
# Block 2: Load spaCy and SentenceTransformer Models
def load_spacy_model():
    """Load spaCy medical NER model."""
    return spacy.load("en_ner_bc5cdr_md")

def load_sentence_transformer():
    """Load SentenceTransformer for FCS."""
    return SentenceTransformer("all-MiniLM-L6-v2", device="cuda")

nlp = load_spacy_model()
embedder = load_sentence_transformer()

**Loading Dataset and creating**

In [None]:
# ✅ Manually define 5 QA pairs (3 curated + 2 additional realistic)
questions = [
    "Are there any further procedures planned for the patient?",
    "Does the patient require long term monitoring?",
    "What precautions does the patient need to take post-discharge?",
    "What medications is the patient currently taking?",
    "What is the patient's primary diagnosis?"
]

inputs = [
    "Gender: F\nChief Complaint: Abdominal distention, nausea, and vomiting\nHistory: Cirrhosis, multiple paracenteses for ascites\nPlan: Schedule regular paracentesis every 2 weeks",
    "Gender: F\nChief Complaint: Abdominal distention, nausea, and vomiting\nPlan: Monitor weight and abdominal girth daily; assess for signs of fluid overload",
    "Gender: M\nChief Complaint: Abd pain, Hypotension\nDischarge Plan: Follow low sodium diet, take prescribed meds, and avoid strenuous activity",
    "Gender: F\nCurrent Medications: Lisinopril 10mg daily, Furosemide 40mg daily\nAllergies: None known\nAssessment: Hypertension, fluid retention",
    "Gender: M\nChief Complaint: Fever, Cough\nFindings: CXR shows consolidation in the right lower lobe\nAssessment: Community-acquired pneumonia"
]

references = [
    "Yes, the patient requires regular paracentesis due to fluid accumulation.",
    "Yes, the patient requires close monitoring for fluid accumulation and symptoms.",
    "Follow up with the doctor or nurse practitioner. Avoid heavy lifting and follow dietary guidelines.",
    "The patient is currently taking Lisinopril and Furosemide.",
    "The patient's primary diagnosis is community-acquired pneumonia."
]

**Loading Biomistral model**


In [None]:
# Block 4: Load BioMistral 7B Model
def load_model_and_tokenizer(model_name="BioMistral/BioMistral-7B"):
    """Load BioMistral 7B model and tokenizer with quantization."""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )
    model.config.pad_token_id = tokenizer.eos_token_id
    model.eval()
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()
print(f"BioMistral-7B model and tokenizer loaded successfully!")

**Response Generation**

In [None]:
# Block 5: Prompt and Validation Functions
def create_prompt(question, context):
    """Create a prompt for the model."""
    return f"""You are a clinical assistant. Provide concise, factual answers based ONLY on the available information.

Question: {question}
Available Context: {context if context.strip() else "No specific clinical data provided"}

Answer (just the factual medical response, no references to tables/figures):"""

def validate_answer(answer):
    """Validate generated answer to exclude invalid phrases."""
    invalid_phrases = ["Table", "Figure", "as shown in", "refer to"]
    if any(phrase.lower() in answer.lower() for phrase in invalid_phrases):
        return "Unable to generate proper response from available data"
    return answer.strip()

In [None]:
# Block 6: Dataset Class for Generation
class QADataset(Dataset):
    """Dataset class for question answering."""
    def __init__(self, questions, inputs, references, tokenizer, max_length=256):
        self.questions = questions
        self.inputs = inputs
        self.references = references
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        input_text = self.inputs[idx]
        prompt = create_prompt(question, input_text)
        encoding = self.tokenizer(
            prompt,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'prompt_length': encoding['input_ids'].shape[1],
            'question': question,
            'input_text': input_text,
            'reference': self.references[idx]
        }


In [None]:
# Block 7: Generate Responses
def generate_responses(model, tokenizer, questions, inputs, references):
    """Generate responses for the dataset."""
    bad_words = ["Table", "Figure"]
    bad_words_ids = []
    for word in bad_words:
        encoded = tokenizer.encode(word, add_special_tokens=False)
        if encoded:
            bad_words_ids.append(encoded)

    generation_kwargs = {
        'max_new_tokens': 150,
        'do_sample': False,
        'temperature': 0.3,
        'repetition_penalty': 1.5,
        'no_repeat_ngram_size': 4,
        'bad_words_ids': bad_words_ids if bad_words_ids else None,
        'eos_token_id': tokenizer.eos_token_id,
        'pad_token_id': tokenizer.pad_token_id
    }

    qa_dataset = QADataset(questions, inputs, references, tokenizer)
    dataloader = DataLoader(qa_dataset, batch_size=1, shuffle=False)
    generated_outputs = []
    sample_number = 0

    try:
        for batch in dataloader:
            sample_number += 1
            input_ids = batch['input_ids'].to("cuda")
            attention_mask = batch['attention_mask'].to("cuda")
            question = batch['question'][0]
            input_text = batch['input_text'][0]
            reference = batch['reference'][0]
            prompt_length = batch['prompt_length'][0]

            print(f"\n=== Sample {sample_number} ===")
            print(f"Instruction: {question}")
            print(f"Input: {input_text}")

            with torch.amp.autocast(device_type="cuda", dtype=torch.float16):
                outputs = model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    **generation_kwargs
                )

            if outputs.shape[1] > prompt_length:
                new_tokens = outputs[0, prompt_length:]
            else:
                print(f"Warning: No new tokens generated for sample {sample_number}")
                new_tokens = outputs[0]

            generated_answer = tokenizer.decode(new_tokens, skip_special_tokens=True)
            generated_answer = validate_answer(generated_answer)

            print(f"Generated Answer: {generated_answer}")
            print(f"Ground Truth Answer: {reference}")

            generated_outputs.append(generated_answer)

        print(f"\nProcessed {sample_number} samples")
        return generated_outputs

    except Exception as e:
        print(f"Error during generation: {str(e)}")
        print(f"Stopped at sample {sample_number}")
        print(f"Problematic sample details: {question}, {input_text}")
        return generated_outputs

# Generate responses
generated_outputs = generate_responses(model, tokenizer, questions, inputs, references)


**Evaluation Metrics**

In [None]:
pip install openai


In [None]:
# Block 9: Evaluate Generated Outputs
if generated_outputs:
    compute_metrics_per_query(generated_outputs, references, questions, nlp, embedder)
else:
    print("No outputs generated due to error.")

# Block 10: Clear GPU Memory
torch.cuda.empty_cache()