In [None]:
# Install Pytorch & other libraries
%pip install "torch>=2.4.0" tensorboard torchvision

# Install Gemma release branch from Hugging Face
%pip install "transformers>=4.51.3"

# Install Hugging Face libraries
%pip install  --upgrade \
  "datasets==3.3.2" \
  "accelerate==1.4.0" \
  "evaluate==0.4.3" \
  "bitsandbytes==0.45.3" \
  "trl==0.15.2" \
  "peft==0.14.0" \
  "pillow==11.1.0" \
  protobuf \
  sentencepiece

In [None]:
#Mount the notebook on to the google drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Set the working directory to dl_project_fall_2025
import os
os.chdir("/content/drive/MyDrive/DL_Project_2025/dl_project_fall_2025")

# Auto relaod doesnt work in google colab, so you can use reload to reload your function calls
from importlib import reload

In [None]:
# Importing git token and huggig face tokens
from google.colab import userdata
from huggingface_hub import login

# Retrieve the GitHub Token from Colab secrets
GH_TOKEN = userdata.get('git_token') # Ensure you stored your PAT under the secret name 'GH_TOKEN'
hf_token = userdata.get('hugging_face')
login(hf_token)

# Configure Git to use the PAT directly in the remote URL for the 'origin'
!git remote set-url origin https://{GH_TOKEN}@github.com/7yashwanth7/dl_project_fall_2025.git
!git config --global user.email "7yashwanth7@gmail.com" # Modify to your username and pwd
!git config --global user.name "7yashwanth7"

In [None]:
from src.llmft.data_preprocessing import preprocess_utils

defaults = preprocess_utils.read_yaml('src/llmft/config/defaults.yaml')
cui_mapping_json = preprocess_utils.read_json('mapping_files/cui_mapping.json')
cui_mapping = preprocess_utils.get_cui_mapping(cui_mapping_json)


In [None]:

import torch
from transformers import AutoModelForImageTextToText, AutoProcessor, BitsAndBytesConfig
from peft import PeftModel
from datasets import load_dataset

base_model_id = "google/gemma-3-4b-pt"
processor_id  = "google/gemma-3-4b-it"          # <-- IMPORTANT: same as training
adapter_dir   = "gemma-3-cui-finetuned-sample1"

# 1) Load processor (IT) and tokenizer
processor = AutoProcessor.from_pretrained(processor_id)
tokenizer = processor.tokenizer

# 2) Re-add CUI tokens exactly like during training
#    (you used plain codes like "C0041618", not "")
cui_tokens = list(cui_mapping.keys())
num_added = tokenizer.add_tokens(cui_tokens)
print("Added", num_added, "CUI tokens")

processor.tokenizer = tokenizer

# 3) Load base model (PT) with 4-bit quant
model_kwargs = dict(
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
    device_map="auto",
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_storage=torch.bfloat16,
    ),
)

model = AutoModelForImageTextToText.from_pretrained(
    base_model_id,
    **model_kwargs,
)

# 4) Resize embeddings to match tokenizer (base vocab + CUI tokens)
model.resize_token_embeddings(len(tokenizer))

# 5) Attach LoRA adapter you trained
model = PeftModel.from_pretrained(model, adapter_dir)
model.eval()

In [None]:
from datasets import load_dataset

dataset_specs = [
    {"name": "eltorio/ROCOv2-radiology", "split": "test", "max_samples": 200},
]

datasets = {}
for spec in dataset_specs:
    ds = load_dataset(spec["name"], split=spec["split"])
    if spec.get("max_samples"):
        ds = ds.select(range(spec["max_samples"]))
    ds = ds.with_format("python")  # keep PIL images
    datasets[spec["name"]] = ds
    print(f"{spec['name']}[{spec['split']}] -> {len(ds)} samples")

# Keep one dataset handy for quick examples below
dataset = datasets[dataset_specs[0]["name"]]


In [None]:
sample_generate = dataset[0]
image = sample_generate["image"]


In [None]:
system_message = "You are a digital radiologist who can understand the medical scan of images code the concepts and provide captions"

user_prompt = """Create a description based on the provided image and return the description of the image with details of the scan as captions, the concepts and their descriptions, only the concepts that are extracted"""

messages = [
    {
        "role": "system",
        "content": [
            {"type": "text", "text": system_message},
        ],
    },
    {
        "role": "user",
        "content": [
            {"type": "text", "text": user_prompt},
            {"type": "image", "image": image},
        ],
    },
]

In [None]:
# Turn chat messages into a single string prompt
chat_text = processor.apply_chat_template(
    messages,
    add_generation_prompt=True,   # <-- important for inference
    tokenize=False,
)

# Build model inputs (batch size 1)
inputs = processor(
    text=[chat_text],
    images=[image],
    return_tensors="pt",
    padding=True,
)

# Move to correct device
inputs = {k: v.to(model.device) for k, v in inputs.items()}

# Generate
with torch.no_grad():
    generated_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.9,
        top_p=0.9,
    )

# Option 1: decode only the new tokens (without re-printing the prompt)
gen_only_ids = generated_ids[:, inputs["input_ids"].shape[-1]:]
output_text = tokenizer.decode(gen_only_ids[0], skip_special_tokens=True)

print("MODEL OUTPUT:\n")
print(output_text)

In [None]:
sample_generate

## Batched scoring and saving to Google Drive

These cells batch inference across multiple datasets and save JSONL outputs to Google Drive (or locally if Drive is not available).


In [None]:
from importlib import reload
from pathlib import Path

from src.llmft.inference import score as score_utils
reload(score_utils)


In [None]:
# Generation and batching settings
batch_size = 4
max_samples = None  # set to an int to cap samples per dataset

gen_kwargs = dict(
    max_new_tokens=256,
    do_sample=True,
    temperature=0.9,
    top_p=0.9,
)

# Where to save outputs (Google Drive if available)
drive_output_dir = Path("/content/drive/MyDrive/llmft_scores/4Bit_Quant_Gemma_YL")
try:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=False)
except Exception:
    print("Colab drive not available; saving locally.")
    drive_output_dir = Path("scores")

drive_output_dir.mkdir(parents=True, exist_ok=True)
print(f"Saving outputs to: {drive_output_dir}")


In [None]:
results_paths = score_utils.score_multiple_and_save(
    datasets=datasets,
    model=model,
    processor=processor,
    system_message=system_message,
    user_prompt=user_prompt,
    output_dir=drive_output_dir,
    batch_size=batch_size,
    max_samples=max_samples,
    gen_kwargs=gen_kwargs,
)

for name, path in results_paths.items():
    print(f"Saved {name} -> {path}")


In [None]:
!git pull