In [None]:
# !rm -rf /content/qwen3-* *.xlsx


In [None]:
!pip install -q transformers datasets peft accelerate bitsandbytes pandas openpyxl

In [None]:
import os
import re
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import LoraConfig, get_peft_model, TaskType
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
import random
import numpy as np
from transformers import set_seed

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
set_seed(seed)

import importlib.metadata
print(importlib.metadata.version("bitsandbytes"))

0.46.1


In [None]:
# ⚙️ Step 2: Upload XLSX file
from google.colab import files
uploaded = files.upload()  # upload your xlsx file here


Saving train-test-data_modified.xlsx to train-test-data_modified.xlsx


In [None]:
from google.colab import drive


In [None]:
# 📊 3. Load and Prepare Data
import pandas as pd
from datasets import Dataset

xlsx_file = list(uploaded.keys())[0]
df_train = pd.read_excel(xlsx_file, sheet_name="train")
df_test = pd.read_excel(xlsx_file, sheet_name="test")

In [None]:
# Combine input and output into prompt and target for causal LM
def build_prompt(input_finding: str) -> str:
    return """
    You are a clinical AI assistant. Extract only the confirmed or highly suspected disease names from this radiology report.

Instructions:
- Do not include any explanation, reasoning, or additional text.
- Return only the disease names, separated by commas.
- Use standard medical terminology.
- Be precise and avoid speculation.
- Do not duplicate disease names.
- If no diseases are found, output: No acute abnormality.

Now analyze this radiology report:
{input_finding}

output_disease\n"""

def build_example(row):
    prompt = build_prompt(row['input_finding'])
    return {
        "prompt_text": prompt,
        "completion_text": row['output_disease']
    }

df_train = df_train.sample(frac=1, random_state=seed).reset_index(drop=True)
train_pairs = df_train.apply(build_example, axis=1).tolist()

test_pairs  = df_test.apply(build_example, axis=1).tolist()

# Convert to Hugging Face Dataset from list of dicts
train_dataset = Dataset.from_list(train_pairs)
test_dataset  = Dataset.from_list(test_pairs)



In [None]:
#Step2. load model
model_id = "Qwen/Qwen3-4B"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,padding_side = "left")
# tokenizer.padding_side = "left"


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# 🔤 Step 4: Tokenize Data
def tokenize_fn(example):
    # Tokenize prompt and completion separately
    inputs = tokenizer(example['prompt_text'], truncation=True, max_length=1024, add_special_tokens=False)
    targets = tokenizer(example['completion_text'], truncation=True, max_length=32, add_special_tokens=False)
    # Concatenate and add EOS
    input_ids = inputs['input_ids'] + targets['input_ids'] + [tokenizer.eos_token_id]
    attention_mask = [1] * len(input_ids)
    # Mask prompt tokens for loss
    labels = [-100] * len(inputs['input_ids']) + targets['input_ids'] + [tokenizer.eos_token_id]
    return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}

# Map without batching to preserve varying lengths
tokenized_train = train_dataset.map(tokenize_fn, batched=False)
tokenized_test  = test_dataset.map(tokenize_fn, batched=False)



Map:   0%|          | 0/1236 [00:00<?, ? examples/s]

Map:   0%|          | 0/386 [00:00<?, ? examples/s]

In [None]:

# 🧩 Step 5: Compute Config (CPU vs GPU)
def get_compute_config():
    if torch.cuda.is_available():
        print('GPU available')
        return {
            'device': 'cuda',
            'load_in_4bit': True,
            'bf16': torch.cuda.is_bf16_supported(),
            'device_map': 'auto',
            'per_device_train_batch_size': 4 # for A100 GPU
        }
    else:
        print('only CPU')
        return {
            'device': 'cpu',
            'load_in_4bit': False,
            'bf16': False,
            'device_map': {'': 'cpu'},
            'per_device_train_batch_size': 1
        }

config = get_compute_config()


if config['load_in_4bit']:
    # GPU + 4‑bit path
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        device_map=config['device_map'],
        trust_remote_code=True
    )
else:
    # CPU (or full‑precision) path – no bitsandbytes
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float32,
        device_map=config['device_map'],
        trust_remote_code=True
    )



GPU available


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:

# 🧼 Step : Clean and parse the generated output
def parse_labels(output_string, delimiter="output_disease"):
    """
    Extracts a list of disease/abnormality labels from the model's raw output.

    - Splits on the given delimiter (e.g. "output_disease")
    - Detects whether the body is comma-separated or bullet-list
    - Cleans up whitespace and leading bullets/hyphens
    - PRESERVES all entries (no deduplication)
    """
    # 1) Isolate the part after your delimiter
    try:
        body = output_string.split(delimiter, 1)[1]
    except IndexError:
        body = output_string

    # 2) Trim whitespace
    body = body.strip()

    labels = []
    # 3a) If there's a comma on the first line, treat as comma-separated
    first_line = body.splitlines()[0]
    if "," in first_line:
        parts = [p.strip() for p in first_line.split(",")]
        labels = [p for p in parts if p]

    # 3b) Otherwise, one-item-per-line list
    else:
        for line in body.splitlines():
            clean = line.strip()
            if not clean:
                continue
            # remove leading bullets or hyphens
            clean = re.sub(r"^[\-\u2022]\s*", "", clean)
            # split if there are commas in a bullet
            for part in clean.split(","):
                part = part.strip()
                if part:
                    labels.append(part)

    return labels



In [None]:
# 🧠 Step 6: Load Model with LoRA

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type=TaskType.CAUSAL_LM,
    #target_modules=['q_proj', 'v_proj'],
    target_modules=['q_proj','k_proj','v_proj','o_proj'],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 5,898,240 || all params: 4,028,366,336 || trainable%: 0.1464


In [None]:
# # 🧩 Step 7: Custom Data Collator

def custom_collate(features, tokenizer):
    pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id

    # 1. turn each list into a tensor
    input_ids      = [torch.tensor(f['input_ids'],      dtype=torch.long) for f in features]
    attention_mask = [torch.tensor(f['attention_mask'], dtype=torch.long) for f in features]
    labels         = [torch.tensor(f['labels'],         dtype=torch.long) for f in features]

    # 2. pad all to the max length in the batch
    input_ids      = pad_sequence(input_ids,      batch_first=True, padding_value=pad_token_id)
    attention_mask = pad_sequence(attention_mask, batch_first=True, padding_value=0)
    labels         = pad_sequence(labels,         batch_first=True, padding_value=-100)

    return {
        'input_ids':      input_ids,
        'attention_mask': attention_mask,
        'labels':         labels
    }




In [None]:
# 🏃 Step 8: Training Setup and Run
from transformers import TrainingArguments, Trainer
from functools import partial

training_args = TrainingArguments(
    output_dir='./qwen3-lora-4B',
    per_device_train_batch_size=config['per_device_train_batch_size'],
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    logging_steps=1,
    eval_strategy='steps',
    eval_steps=10,
    save_strategy='steps',
    save_steps=100,
    logging_dir='./logs',
    learning_rate=1e-4,
    bf16=config['bf16'],
    label_names=["labels"],
    report_to='tensorboard'
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator = partial(custom_collate, tokenizer=tokenizer),

)

trainer.train()


Step,Training Loss,Validation Loss
10,2.7041,2.609092
20,1.3325,1.591013
30,1.2474,1.383826


TrainOutput(global_step=39, training_loss=2.0994506646425295, metrics={'train_runtime': 148.9163, 'train_samples_per_second': 8.3, 'train_steps_per_second': 0.262, 'total_flos': 3128902831792128.0, 'train_loss': 2.0994506646425295, 'epoch': 1.0})

In [None]:
# # 💾 9. Merge and Save the Model

# ==============================
# 💾 1. Save Adapter-Only LoRA
# ==============================
adapter_only_dir = "./qwen3-4B-lora-only"

# Save only the LoRA adapter weights (small, reusable)
model.save_pretrained(adapter_only_dir, save_adapter=True)
tokenizer.save_pretrained(adapter_only_dir)

print(f"✅ Adapter-only LoRA saved to {adapter_only_dir}")

# ==============================
# 💾 2. Merge LoRA into Base Model and Save Full Fine-Tuned Model
# ==============================
merged_model_dir = "./qwen3-4B-lora-merged"

# Merge LoRA weights into base model weights for standalone inference
model = model.merge_and_unload()

# Save the merged full model
model.save_pretrained(merged_model_dir)
tokenizer.save_pretrained(merged_model_dir)

print(f"✅ Merged full model saved to {merged_model_dir}")


✅ Adapter-only LoRA saved to ./qwen3-4B-lora-only




✅ Merged full model saved to ./qwen3-4B-lora-merged


In [None]:
!tar -czvf qwen3-4B-lora-merged.tar.gz /content/qwen3-4B-lora-merged

tar: Removing leading `/' from member names
/content/qwen3-4B-lora-merged/
/content/qwen3-4B-lora-merged/vocab.json
/content/qwen3-4B-lora-merged/merges.txt
/content/qwen3-4B-lora-merged/added_tokens.json
/content/qwen3-4B-lora-merged/special_tokens_map.json
/content/qwen3-4B-lora-merged/chat_template.jinja
/content/qwen3-4B-lora-merged/tokenizer.json
/content/qwen3-4B-lora-merged/model.safetensors
/content/qwen3-4B-lora-merged/tokenizer_config.json
/content/qwen3-4B-lora-merged/config.json
/content/qwen3-4B-lora-merged/generation_config.json


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir /content/drive/My\ Drive/qwen3-4B-epoch-1-lora-lr_1
!cp -r /content/qwen3-4B-lora-merged.tar.gz /content/drive/My\ Drive/qwen3-4B-epoch-1-lora-lr_1/
!cp -r /content/qwen3-4B-lora-only /content/drive/My\ Drive/qwen3-4B-epoch-1-lora-lr_1/
!cp -r /content/qwen3-lora-4B /content/drive/My\ Drive/qwen3-4B-epoch-1-lora-lr_1/
!cp -r /content/qwen3-4B-lora-merged /content/drive/My\ Drive/qwen3-4B-epoch-1-lora-lr_1/


In [None]:
#Step2. load model
merged_model_dir = "./qwen3-4B-lora-merged"
tokenizer = AutoTokenizer.from_pretrained(merged_model_dir)

# tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Load merged model
model = AutoModelForCausalLM.from_pretrained(
    merged_model_dir,
    device_map="auto",          # or device_map={"": "cuda"} if only one GPU
    torch_dtype="auto"          # or torch.float16 for forced half precision
)


pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

print("✅ Model and tokenizer loaded successfully.")


Device set to use cuda:0


✅ Model and tokenizer loaded successfully.


In [None]:

# 🧾 Step 3: Define the prompt template

def build_prompt(input_finding):
#    return f"""
#Your task is to extract disease and abnormal findings from the following radiology report.
    return f"""You are a clinical AI assistant. Extract only the confirmed or highly suspected disease names from this radiology report.

Instructions:
- Do not include any explanation, reasoning, or additional text.
- Return only the disease names, separated by commas.
- Use standard medical terminology.
- Be precise and avoid speculation.
- Do not duplicate disease names.
- If no diseases are found, output: No significant findings.

Now analyze this radiology report:
{input_finding}

output_disease\n"""



In [None]:
def parse_labels(output_string, delimiter="output_disease"):
    """
    Extracts a list of disease/abnormality labels from the model's raw output.

    - Splits on the given delimiter (e.g. "output_disease")
    - Detects whether the body is comma-separated or bullet-list
    - Cleans up whitespace and leading bullets/hyphens
    - PRESERVES all entries (no deduplication)
    """
    # 1) Isolate the part after your delimiter
    try:
        body = output_string.split(delimiter, 1)[1]
    except IndexError:
        body = output_string

    # 2) Trim whitespace
    body = body.strip()

    labels = []
    # 3a) If there's a comma on the first line, treat as comma-separated
    first_line = body.splitlines()[0]
    if "," in first_line:
        parts = [p.strip() for p in first_line.split(",")]
        labels = [p for p in parts if p]

    # 3b) Otherwise, one-item-per-line list
    else:
        for line in body.splitlines():
            clean = line.strip()
            if not clean:
                continue
            # remove leading bullets or hyphens
            clean = re.sub(r"^[\-\u2022]\s*", "", clean)
            # split if there are commas in a bullet
            for part in clean.split(","):
                part = part.strip()
                if part:
                    labels.append(part)

    return labels


In [None]:

# 🧪 Step 5: Example inference
example_input = """The liver is normal in size and shape with homogeneous density. A patchy low-density lesion is seen around the liver fissure. The intrahepatic duct system is not obviously dilated, and the course is normal. The gallbladder is not enlarged, with no obvious thickening of the wall, and no clearly abnormal density foci are seen inside. The spleen is normal in size and shape with homogeneous density, and some punctate low-density foci are seen inside. The pancreas is normal in size and shape with homogeneous density, and no clearly abnormal density foci are seen inside. The main pancreatic duct is not obviously dilated, and the peripancreatic fat space is clear. Both kidneys are normal in size and shape with homogeneous density. A round low-density lesion is seen in the right kidney with a diameter of about 16mm. The left adrenal gland is thickened, and a punctate high-density lesion is seen in the right adrenal gland. The renal pelvis-calyx system is not obviously dilated. The perirenal fat space is clear, and no clearly abnormal density foci are seen. No enlarged lymph nodes are seen in the retroperitoneum."""
expected_output = ["renal cyst", "adrenal hyperplasia", "adrenal calcification"]

prompt = build_prompt(example_input)
response = pipe(prompt, max_new_tokens=32, do_sample=False)[0]["generated_text"]
prediction = parse_labels(response, delimiter="output_disease")
print("🧠 Predicted:", prediction)

print("\n \n \n full responce: \n", response)


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


🧠 Predicted: ['Kidney tumor', 'Adrenal hyperplasia', 'Adrenal tumor', 'Hepatic lesion', 'Spleen lesion']

 
 
 full responce: 
 You are a clinical AI assistant. Extract only the confirmed or highly suspected disease names from this radiology report.

Instructions:
- Do not include any explanation, reasoning, or additional text.
- Return only the disease names, separated by commas.
- Use standard medical terminology.
- Be precise and avoid speculation.
- Do not duplicate disease names.
- If no diseases are found, output: No significant findings.

Now analyze this radiology report:
The liver is normal in size and shape with homogeneous density. A patchy low-density lesion is seen around the liver fissure. The intrahepatic duct system is not obviously dilated, and the course is normal. The gallbladder is not enlarged, with no obvious thickening of the wall, and no clearly abnormal density foci are seen inside. The spleen is normal in size and shape with homogeneous density, and some punct

In [None]:
# 🧮 Step 6: Metric calculation

def compute_metrics(preds, targets):
    # preds, targets: lists of label‐lists
    mlb = MultiLabelBinarizer()
    mlb.fit(preds + targets)  # <<< fit on all individual lists

    y_pred = mlb.transform(preds)
    y_true = mlb.transform(targets)

    return {
        "precision": precision_score(y_true, y_pred, average='micro', zero_division=0),
        "recall":    recall_score(y_true, y_pred, average='micro', zero_division=0),
        "f1_micro":  f1_score(y_true, y_pred, average='micro', zero_division=0),
        "jaccard":   jaccard_score(y_true, y_pred, average='samples', zero_division=0).item(),
    }

# Parse into a list
pred_labels = parse_labels(response, delimiter="output_disease")
pred_labels=[lbl.lower() for lbl in pred_labels]


# Prepare ground truth
true_labels = [lbl.strip().lower() for lbl in expected_output]

# Compute metrics
metrics = compute_metrics([pred_labels], [true_labels])
print("📊 Evaluation:", metrics)


📊 Evaluation: {'precision': 0.2, 'recall': 0.3333333333333333, 'f1_micro': 0.25, 'jaccard': 0.14285714285714285}


In [None]:
# ✅ Parse into a list
pred_labels = parse_labels(response, delimiter="output_disease")
pred_labels = [lbl.lower() for lbl in pred_labels]

# ✅ Prepare ground truth
true_labels = [lbl.strip().lower() for lbl in expected_output]

# ✅ Compute metrics
metrics = compute_metrics([pred_labels], [true_labels])

# ✅ Print each metric on a separate line with 3 decimal places
print("📊 Evaluation Results:")
for key, value in metrics.items():
    print(f"{key}: {value:.3f}")

📊 Evaluation Results:
precision: 0.200
recall: 0.333
f1_micro: 0.250
jaccard: 0.143
