## HF config

In [None]:
import os
import dotenv
%cd /home/code/NeuraLens
# Load environment variables from .env file
dotenv.load_dotenv(".env")
# Access the HF_TOKEN environment variable
HF_TOKEN = os.getenv("HF_TOKEN")

/home/code/NeuraLens


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Dataset preprocess

In [2]:
# assuming dataset is already downloaded in src/dataset
from src.dataset import tissue_dataset
from torchvision import transforms

dataset_path = "src/dataset/Motic-Human-tissues"
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Resize((512, 512)),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
transform = None
batch_size = 1
split_ratio = 0.8

dataset = tissue_dataset(
    images_dir_path=dataset_path,
    transform=transform,
    split_ratio=split_ratio,
    batch_size=batch_size
    )


Found 556 images in src/dataset/Motic-Human-tissues
Creating HuggingFace datasets...
Formatting training data...


Map:   0%|          | 0/444 [00:00<?, ? examples/s]

Formatting validation data...


Map:   0%|          | 0/112 [00:00<?, ? examples/s]

In [3]:
print(f"Current type classes: {dataset.type_classes}")
print(f"Current zoom classes: {dataset.zoom_classes}")
print(f"Current focus classes: {dataset.focus_classes}")

Current type classes: ['human adrenal gland', 'human bone marrow', 'human cerebelum', 'human esophagus', 'human heart section', 'human hyaline cartilage', 'human kidney', 'human liver section', 'human salivary gland', 'human small intestine', 'human spleen', 'human stratified epithelium', 'human thymus', 'human tongue', 'human tonsil tongue']
Current zoom classes: [4, 10, 20, 40]
Current focus classes: [0, 1]


## Fine tuning

In [4]:
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText, BitsAndBytesConfig

model_id = "google/medgemma-4b-it"

# Check if GPU supports bfloat16
if torch.cuda.get_device_capability()[0] < 8:
    raise ValueError("GPU does not support bfloat16, please use a GPU that supports bfloat16.")

model_kwargs = dict(
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model_kwargs["quantization_config"] = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=model_kwargs["torch_dtype"],
    bnb_4bit_quant_storage=model_kwargs["torch_dtype"],
)

model = AutoModelForImageTextToText.from_pretrained(model_id, **model_kwargs)
processor = AutoProcessor.from_pretrained(model_id)

# Use right padding to avoid issues during training
processor.tokenizer.padding_side = "right"
dataset.set_processor(processor)
dataset.build_train_val_loaders()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [5]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=16,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
    modules_to_save=[
        "lm_head",
        "embed_tokens",
    ],
)

# Apply LoRA configuration to the model
model = get_peft_model(model, peft_config)

In [6]:
#TODO add evaluate, and do it before & after training
def evaluate(model, dataset):
    return

In [None]:
from transformers import get_scheduler
from tqdm import tqdm
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# -- Training parameters --
epochs = 3
weight_decay = 0.001
learning_rate = 2e-5
scheduler_name = "cosine"
ratio_warmup = 0.1

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay,
)

# Fix the dataloader reference
scheduler = get_scheduler(
    name=scheduler_name,
    optimizer=optimizer,
    num_warmup_steps=int(len(dataset.train_loader) * epochs * ratio_warmup),
    num_training_steps=len(dataset.train_loader) * epochs,
)

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    
    # Training loop
    model.train()
    train_loss = 0.0
    train_steps = 0
    
    for batch in tqdm(dataset.train_loader, desc="Training"):
        # Convert messages to text format for the processor
        batch = {k: v.to(model.device) for k, v in batch.items()}
        
        outputs = model(**batch)
        loss = outputs.loss
        
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        
        train_loss += loss.item()
        train_steps += 1
    
    avg_train_loss = train_loss / train_steps
    print(f"Average Training Loss: {avg_train_loss:.4f}")

        # Evaluation phase
    model.eval()
    val_loss = 0.0
    val_steps = 0
    
    with torch.no_grad():
        for batch in tqdm(dataset.val_loader, desc="Validation"):
            batch = {k: v.to(model.device) for k, v in batch.items()}
            
            outputs = model(**batch)
            loss = outputs.loss
            
            val_loss += loss.item()
            val_steps += 1
    
    avg_val_loss = val_loss / val_steps
    print(f"Average Validation Loss: {avg_val_loss:.4f}")
    print("-" * 50)
    


Epoch 1/3


Training:   0%|          | 0/444 [00:00<?, ?it/s]

Training:  25%|██▍       | 109/444 [02:19<07:04,  1.27s/it]