# OncoScope: Fine-tune Gemma 3n for Cancer Genomics Analysis
Modified from Unsloth's official Gemma 3n notebook for cancer mutation analysis

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yourusername/oncoscope/blob/main/oncoscope_gemma_3n_colab.ipynb)

## 1. Install Dependencies

In [None]:
%%capture
# Install Unsloth for Colab
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
!pip install --no-deps unsloth

# Install latest transformers for Gemma 3N
!pip install --no-deps --upgrade transformers
!pip install --no-deps --upgrade timm

## 2. Upload Your Cancer Training Data
Upload your `cancer_training_data.json` file when prompted

In [None]:
from google.colab import files
import json

print("Please upload your cancer_training_data.json file...")
uploaded = files.upload()

# Load the training data
with open('cancer_training_data.json', 'r') as f:
    cancer_training_data = json.load(f)

print(f"Loaded {len(cancer_training_data)} cancer genomics training examples")

## 3. Load Gemma 3n Model (Text-Only Mode)

In [None]:
from unsloth import FastModel
import torch

# Use E2B (2B) model for T4 GPU compatibility
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    dtype = None,  # Auto detection
    max_seq_length = 1024,  # Reduced for T4 GPU
    load_in_4bit = True,
    full_finetuning = False,  # Use LoRA
)

print(f"Model loaded! GPU: {torch.cuda.get_device_name(0)}")
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 4. Configure LoRA for Text-Only Fine-tuning

In [None]:
# Configure for text-only cancer genomics
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False,  # Disable vision - saves VRAM!
    finetune_language_layers   = True,   # Text only for cancer analysis
    finetune_attention_modules = True,   # Good for specialized domain
    finetune_mlp_modules       = True,   # Keep for performance
    
    r = 8,           # LoRA rank - reduced for T4
    lora_alpha = 8,  # Match r value
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

print("LoRA adapters configured for cancer genomics text analysis!")

## 5. Prepare Cancer Genomics Dataset

In [None]:
from unsloth.chat_templates import get_chat_template, standardize_data_formats
from datasets import Dataset

# Setup Gemma 3 chat template
tokenizer = get_chat_template(tokenizer, chat_template = "gemma-3")

# Convert cancer data to conversation format
conversations_data = []
for example in cancer_training_data:
    conversation = [
        {"role": "user", "content": example["input"]},
        {"role": "assistant", "content": example["output"]}
    ]
    conversations_data.append({"conversations": conversation})

# Create dataset
dataset = Dataset.from_list(conversations_data)
dataset = standardize_data_formats(dataset)

# Apply chat template
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(
            convo, 
            tokenize=False, 
            add_generation_prompt=False
        ).removeprefix('<bos>')
        for convo in convos
    ]
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)

# Split dataset
dataset = dataset.train_test_split(test_size=0.1, seed=3407)
print(f"Training samples: {len(dataset['train'])}")
print(f"Evaluation samples: {len(dataset['test'])}")

## 6. Training Configuration

In [None]:
from trl import SFTTrainer, SFTConfig
from unsloth.chat_templates import train_on_responses_only

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 1,  # T4 limitation
        gradient_accumulation_steps = 4,  # Effective batch size = 4
        warmup_steps = 5,
        num_train_epochs = 3,  # Full training
        learning_rate = 2e-4,
        logging_steps = 10,
        save_steps = 100,
        eval_strategy = "steps",
        eval_steps = 50,
        optim = "paged_adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "oncoscope_outputs",
        report_to = "tensorboard",  # Can view in Colab
        load_best_model_at_end = True,
        metric_for_best_model = "eval_loss",
    ),
)

# Train only on assistant responses
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

print("Trainer configured for cancer genomics specialization!")

## 7. Start Training

In [None]:
# Monitor GPU memory before training
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

# Train the model
trainer_stats = trainer.train()

# Show training stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
print(f"Training completed in {trainer_stats.metrics['train_runtime']/60:.2f} minutes")
print(f"Peak memory = {used_memory} GB ({used_percentage}% of max)")
print(f"Peak memory for training = {used_memory_for_lora} GB")

## 8. Test Cancer Analysis

In [None]:
# Test cancer genomics queries
test_queries = [
    "Analyze the BRCA1 c.68_69delAG mutation for cancer risk assessment.",
    "What are the therapeutic implications of KRAS G12C mutation in lung cancer?",
    "Provide clinical recommendations for a patient with TP53 R175H mutation."
]

from transformers import TextStreamer
streamer = TextStreamer(tokenizer, skip_prompt=True)

for query in test_queries:
    print(f"\n{'='*60}")
    print(f"Query: {query}")
    print(f"{'='*60}")
    
    messages = [{"role": "user", "content": query}]
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        tokenize=True,
        return_dict=True,
    ).to("cuda")
    
    _ = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=1.0,
        top_p=0.95,
        top_k=64,
        streamer=streamer,
    )
    
    # Cleanup
    del inputs
    torch.cuda.empty_cache()

## 9. Save the Model

In [None]:
# Save LoRA adapters
model.save_pretrained("oncoscope-gemma-3n-lora")
tokenizer.save_pretrained("oncoscope-gemma-3n-lora")

# Option 1: Save merged model (16-bit)
# model.save_pretrained_merged("oncoscope-gemma-3n-merged", tokenizer)

# Option 2: Save as GGUF for Ollama
# model.save_pretrained_gguf(
#     "oncoscope-gemma-3n",
#     tokenizer,
#     quantization_type = "Q8_0"
# )

print("Model saved! You can download the files or push to HuggingFace.")

## 10. Download Model Files

In [None]:
# Zip and download the model
!zip -r oncoscope-gemma-3n-lora.zip oncoscope-gemma-3n-lora/
files.download('oncoscope-gemma-3n-lora.zip')

print("Model downloaded! You can now use it locally or deploy it.")