In [None]:
%pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [None]:
%pip install unsloth vllm
%pip install bitsandbytes accelerate xformers peft trl triton cut_cross_entropy unsloth_zoo
%pip install sentencepiece protobuf datasets huggingface_hub hf_transfer setuptools

In [None]:
%pip install --quiet wandb
import wandb
wandb.login()

In [None]:
from unsloth import FastModel
import torch
import os

fourbit_models = [
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",

    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/Llama-3.3-70B",
    "unsloth/mistral-7b-instruct-v0.3",
    "unsloth/Phi-4",
] 

local_model_path = "./saved_models/gemma-3-4b-it"

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it",
    max_seq_length = 2048, 
    load_in_4bit = True,  
    load_in_8bit = False, 
    full_finetuning = False,
)
model.save_pretrained(local_model_path)
tokenizer.save_pretrained(local_model_path)
print(f"Model saved to: {local_model_path}")

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, 
    finetune_language_layers   = True,  
    finetune_attention_modules = True,  
    finetune_mlp_modules       = True,  

    r = 8,          
    lora_alpha = 8,  
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

In [None]:

from datasets import load_dataset

dataset = load_dataset("json", data_files="./Last_clean_swedish_updated2.jsonl")

dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
train_dataset = dataset["train"]  
val_dataset = dataset["test"]   
print(f"Training set size: {len(train_dataset)} examples")
print(f"Validation set size: {len(val_dataset)} examples")

In [None]:
from trl import SFTTrainer, SFTConfig

sft_config = SFTConfig(
    dataset_text_field="text",
    max_seq_length=2048,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    warmup_steps=50,
    learning_rate=2e-5,
    bf16=True,
    fp16=False,
    gradient_checkpointing=True,
    logging_steps=50,
    output_dir="outputs_gemma3",
    run_name="legal_gemma3_finetuning_h100",
    logging_dir="wandb_logs_gemma3",
    report_to="wandb",
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",    
    eval_strategy="no",
    eval_steps=None,
    load_best_model_at_end=False,
    metric_for_best_model=None,  
    seed=42,
    logging_strategy="steps",  
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=None,
    args=sft_config,
)

In [None]:
torch.cuda.set_per_process_memory_fraction((79.109 - 8) / 79.109, device=0)

In [None]:
trainer_stats = trainer.train()