In [1]:
import os
import json
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import glob

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Model configuration
model_name = "Qwen/Qwen2.5-0.5B-Instruct"

print(f"Loading {model_name}...")

# Configure 4-bit quantization to save memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name, trust_remote_code=True, padding_side="right"
)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16,
)

Loading Qwen/Qwen2.5-0.5B-Instruct...


`torch_dtype` is deprecated! Use `dtype` instead!


In [None]:
def find_transcribed_json_files():
    """Find all JSON files in transcribed directory"""
    transcribed_dir = r"medical_dataset\transcripts\transcribed"

    print(f"Searching for JSON files in: {transcribed_dir}")

    if not os.path.exists(transcribed_dir):
        print(f"Directory not found: {transcribed_dir}")
        print(f"Current directory: {os.getcwd()}")
        print(
            f"Available directories: {[d for d in os.listdir('.') if os.path.isdir(d)]}"
        )
        return []

    # Find JSON files recursively
    json_files = glob.glob(os.path.join(transcribed_dir, "**/*.json"), recursive=True)

    if not json_files:
        json_files = glob.glob(os.path.join(transcribed_dir, "*.json"))

    print(f"Found {len(json_files)} JSON files")

    for i, json_file in enumerate(json_files):
        print(f"  {i+1}. {os.path.basename(json_file)}")

    return json_files


# Find files
json_files = find_transcribed_json_files()

In [None]:
class QwenMedicalDataProcessor:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def load_transcribed_conversations(self, json_files):
        """Load and process JSON conversations for Qwen 2.5"""
        training_examples = []
        successful_files = 0

        for json_file in json_files:
            try:
                print(f"Processing: {os.path.basename(json_file)}")

                with open(json_file, "r", encoding="utf-8") as f:
                    data = json.load(f)

                conversation = self.json_to_conversation(data)

                if conversation and len(conversation.strip()) > 50:
                    examples = self.create_qwen_training_pairs(conversation, json_file)
                    training_examples.extend(examples)
                    successful_files += 1
                    print(f"Created {len(examples)} examples")
                else:
                    print(f"Skipped - conversation too short")

            except Exception as e:
                print(f"Error: {e}")

        print(f"\nSuccessfully processed {successful_files}/{len(json_files)} files")
        print(f"Total training examples: {len(training_examples)}")
        return training_examples

    def json_to_conversation(self, data):
        """Convert JSON to conversation text"""
        if isinstance(data, list):
            conversation_lines = []
            for turn in data:
                speaker = turn.get("speaker", "Unknown")
                dialogue_lines = turn.get("dialogue", [])
                full_dialogue = " ".join(str(line) for line in dialogue_lines)

                speaker_label = "Doctor" if speaker == 1 else "Patient"
                conversation_lines.append(f"{speaker_label}: {full_dialogue}")

            return "\n".join(conversation_lines)
        else:
            return str(data)

    def create_qwen_training_pairs(self, conversation, source_file):
        """Create training examples in Qwen 2.5 chat format"""
        examples = []
        conversation_preview = conversation[:1000]  # Limit length

        # Qwen 2.5 uses special chat format
        # Example 1: Summary generation
        summary_messages = [
            {
                "role": "system",
                "content": "You are a medical AI assistant. Generate concise clinical summaries from doctor-patient conversations.",
            },
            {
                "role": "user",
                "content": f"Please summarize this medical conversation:\n\n{conversation_preview}",
            },
        ]
        summary_text = self.format_qwen_chat(summary_messages)
        examples.append(
            {
                "text": summary_text,
                "type": "summary",
                "source": os.path.basename(source_file),
            }
        )

        # Example 2: Information extraction
        extraction_messages = [
            {
                "role": "system",
                "content": "You are a medical AI assistant. Extract structured medical information from conversations.",
            },
            {
                "role": "user",
                "content": f"Extract key medical information from this conversation:\n\n{conversation_preview}\n\nPlease provide:\n- Symptoms\n- Clinical findings\n- Potential diagnoses\n- Recommendations",
            },
        ]
        extraction_text = self.format_qwen_chat(extraction_messages)
        examples.append(
            {
                "text": extraction_text,
                "type": "extraction",
                "source": os.path.basename(source_file),
            }
        )

        return examples

    def format_qwen_chat(self, messages):
        """Format messages in Qwen 2.5 chat format"""
        formatted_text = ""
        for message in messages:
            if message["role"] == "system":
                formatted_text += (
                    f"<|im_start|>system\n{message['content']}<|im_end|>\n"
                )
            elif message["role"] == "user":
                formatted_text += f"<|im_start|>user\n{message['content']}<|im_end|>\n"
            elif message["role"] == "assistant":
                formatted_text += (
                    f"<|im_start|>assistant\n{message['content']}<|im_end|>\n"
                )

        # Add assistant start for completion
        formatted_text += "<|im_start|>assistant\n"
        return formatted_text


# Initialize processor
print("Initializing Qwen data processor...")
data_processor = QwenMedicalDataProcessor(tokenizer)

if json_files:
    training_examples = data_processor.load_transcribed_conversations(json_files)
else:
    print("‚ùå No JSON files found")
    training_examples = []

In [6]:
def tokenize_for_qwen(examples, tokenizer, max_length=1024):
    """Tokenize for Qwen 2.5 format"""
    texts = examples["text"]

    # Tokenize with Qwen's special tokens
    tokenized = tokenizer(
        texts, truncation=True, padding=True, max_length=max_length, return_tensors="pt"
    )

    # For causal LM, labels are same as input_ids
    tokenized["labels"] = tokenized["input_ids"].clone()

    return tokenized


# Create dataset
if training_examples:
    dataset = Dataset.from_list(training_examples)
    print(f"Dataset created with {len(dataset)} examples")

    # Show sample
    print("\n Sample training example:")
    sample_text = (
        dataset[0]["text"][:300] + "..."
        if len(dataset[0]["text"]) > 300
        else dataset[0]["text"]
    )
    print(sample_text)

    # Split dataset
    if len(dataset) > 1:
        train_test_split = dataset.train_test_split(test_size=0.1, seed=42)
        train_dataset = train_test_split["train"]
        eval_dataset = train_test_split["test"]
    else:
        train_dataset = dataset
        eval_dataset = dataset

    print(f"üìö Training samples: {len(train_dataset)}")
    print(f"üß™ Validation samples: {len(eval_dataset)}")

    # Tokenize
    print("üîÑ Tokenizing datasets...")
    tokenized_train = train_dataset.map(
        lambda x: tokenize_for_qwen(x, tokenizer),
        batched=True,
        remove_columns=train_dataset.column_names,
    )

    tokenized_eval = eval_dataset.map(
        lambda x: tokenize_for_qwen(x, tokenizer),
        batched=True,
        remove_columns=eval_dataset.column_names,
    )

    print("Tokenization completed!")

else:
    print("No training examples - creating minimal dataset")
    # Create minimal example for testing
    minimal_example = {
        "text": "<|im_start|>system\nYou are a medical AI assistant.<|im_end|>\n<|im_start|>user\nSummarize this medical conversation.<|im_end|>\n<|im_start|>assistant\n",
        "type": "summary",
        "source": "minimal",
    }
    dataset = Dataset.from_list([minimal_example])
    tokenized_train = dataset.map(
        lambda x: tokenize_for_qwen(x, tokenizer),
        batched=True,
        remove_columns=dataset.column_names,
    )
    tokenized_eval = tokenized_train

Dataset created with 142 examples

 Sample training example:
<|im_start|>system
You are a medical AI assistant. Generate concise clinical summaries from doctor-patient conversations.<|im_end|>
<|im_start|>user
Please summarize this medical conversation:

Doctor: I am the psychiatrist here in this department.
Patient: I came to see you because my GP sent me to...
üìö Training samples: 127
üß™ Validation samples: 15
üîÑ Tokenizing datasets...


Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 127/127 [00:00<00:00, 1693.37 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15/15 [00:00<00:00, 1666.61 examples/s]

Tokenization completed!





In [8]:
# Configure LoRA for Qwen 2.5
lora_config = LoraConfig(
    r=16,  # LoRA rank
    lora_alpha=32,  # LoRA alpha
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],  # Attention and MLP layers
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Prepare model for training
print("Preparing model for LoRA training...")
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

# Print trainable parameters
model.print_trainable_parameters()

Preparing model for LoRA training...
trainable params: 8,798,208 || all params: 502,830,976 || trainable%: 1.7497


In [11]:
# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Training arguments optimized for Qwen 2.5
training_args = TrainingArguments(
    output_dir="./qwen2.5-medical-finetuned",
    overwrite_output_dir=True,
    num_train_epochs=5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    learning_rate=1e-3,
    fp16=True,
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="none",
    dataloader_pin_memory=False,
    remove_unused_columns=False,
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

print("‚úÖ Trainer configured successfully!")
print(f"üìä Will train for {training_args.num_train_epochs} epochs")
print(f"üìä Batch size: {training_args.per_device_train_batch_size}")

‚úÖ Trainer configured successfully!
üìä Will train for 5 epochs
üìä Batch size: 2


  trainer = Trainer(


In [None]:
print("üöÄ Starting Qwen 2.5 fine-tuning...")

try:
    # Start training
    trainer.train()

    # Save model
    trainer.save_model()
    tokenizer.save_pretrained("./qwen2.5-medical-finetuned")

    print("Fine-tuning completed successfully!")
    print("Model saved to: ./qwen2.5-medical-finetuned")

except Exception as e:
    print(f"Training error: {e}")
    print("Tips:")
    print("   - Check if you have enough GPU memory")
    print("   - Try reducing batch size if needed")
    print("   - Check your training data format")

üöÄ Starting Qwen 2.5 fine-tuning...


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
50,0.0,


üéâ Fine-tuning completed successfully!
üíæ Model saved to: ./qwen2.5-medical-finetuned


In [13]:
def test_fine_tuned_qwen():
    """Test the fine-tuned Qwen 2.5 model"""
    try:
        from peft import PeftModel

        # Load the fine-tuned model
        print("Loading fine-tuned model...")
        tokenizer = AutoTokenizer.from_pretrained(
            "./qwen2.5-medical-finetuned", trust_remote_code=True
        )

        base_model = AutoModelForCausalLM.from_pretrained(
            "Qwen/Qwen2.5-0.5B-Instruct",
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.float16,
        )
        
        model = PeftModel.from_pretrained(base_model, "./qwen2.5-medical-finetuned")
        model.eval()

        print("‚úÖ Fine-tuned model loaded!")

        # Test conversation
        test_conversation = """Doctor: What brings you in today?
Patient: I've been having persistent headaches and fatigue.
Doctor: How long has this been going on?
Patient: About three weeks now. It's affecting my work."""

        # Format in Qwen chat format
        messages = [
            {
                "role": "system",
                "content": "You are a medical AI assistant. Generate concise clinical summaries.",
            },
            {
                "role": "user",
                "content": f"Please summarize this medical conversation:\n\n{test_conversation}",
            },
        ]

        # Format the prompt
        prompt = data_processor.format_qwen_chat(messages)

        # Generate
        inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=200,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,
            )

        response = tokenizer.decode(outputs[0], skip_special_tokens=False)

        # Extract just the assistant's response
        if "<|im_start|>assistant" in response:
            assistant_response = response.split("<|im_start|>assistant")[-1]
            if "<|im_end|>" in assistant_response:
                assistant_response = assistant_response.split("<|im_end|>")[0]
            assistant_response = assistant_response.strip()
        else:
            assistant_response = response[len(prompt) :].strip()

        print("\n" + "=" * 60)
        print("üß™ FINE-TUNED QWEN 2.5 TEST")
        print("=" * 60)
        print(f"üí¨ Input conversation:\n{test_conversation}")
        print(f"\nüìã Generated summary:\n{assistant_response}")

    except Exception as e:
        print(f"‚ùå Error testing model: {e}")


# Test if model exists
if os.path.exists("./qwen2.5-medical-finetuned"):
    test_fine_tuned_qwen()
else:
    print("‚ö†Ô∏è  No fine-tuned model found to test")

Loading fine-tuned model...


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


‚úÖ Fine-tuned model loaded!
‚ùå Error testing model: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



In [14]:
class QwenMedicalRAG:
    def __init__(self, model_path="./qwen2.5-medical-finetuned"):
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(
                model_path, trust_remote_code=True
            )
            base_model = AutoModelForCausalLM.from_pretrained(
                "Qwen/Qwen2.5-0.5B-Instruct",
                device_map="auto",
                trust_remote_code=True,
                torch_dtype=torch.float16,
            )
            from peft import PeftModel

            self.model = PeftModel.from_pretrained(base_model, model_path)
            self.model.eval()
            print("‚úÖ Qwen RAG system initialized!")
        except Exception as e:
            print(f"‚ùå Error initializing RAG: {e}")

    def generate_medical_summary(self, conversation, max_tokens=300):
        """Generate medical summary using fine-tuned Qwen"""
        messages = [
            {
                "role": "system",
                "content": "You are a medical AI assistant. Generate accurate clinical summaries from doctor-patient conversations.",
            },
            {
                "role": "user",
                "content": f"Summarize this medical conversation:\n\n{conversation}",
            },
        ]

        prompt = data_processor.format_qwen_chat(messages)
        inputs = tokenizer.encode(prompt, return_tensors="pt").to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                inputs,
                max_new_tokens=max_tokens,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
            )

        response = tokenizer.decode(outputs[0], skip_special_tokens=False)

        # Extract assistant response
        if "<|im_start|>assistant" in response:
            return (
                response.split("<|im_start|>assistant")[-1]
                .split("<|im_end|>")[0]
                .strip()
            )
        else:
            return response[len(prompt) :].strip()


# Test RAG system
if os.path.exists("./qwen2.5-medical-finetuned"):
    print("üîÑ Testing Qwen RAG system...")
    rag_system = QwenMedicalRAG()

    test_conv = """Doctor: How can I help you today?
Patient: I've been feeling very anxious and having trouble sleeping.
Doctor: When did this start?
Patient: About a month ago, after I changed jobs."""

    summary = rag_system.generate_medical_summary(test_conv)
    print(f"\nüí¨ Test conversation:\n{test_conv}")
    print(f"\nüìã RAG Summary:\n{summary}")

üîÑ Testing Qwen RAG system...
‚ùå Error initializing RAG: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



AttributeError: 'QwenMedicalRAG' object has no attribute 'model'