# Fine-tuning

## Autenticação

In [12]:
from os import getenv, mkdir
from os.path import exists
from dotenv import load_dotenv
from huggingface_hub import login

hf_token = None

if not exists('../.env'):
    hf_token = input('Enter your Hugging Face token: ')
else:
    load_dotenv(dotenv_path='../.env')
    hf_token = getenv('HF_TOKEN')

assert hf_token is not None, 'Invalid Hugging Face token'

login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


## Carregamento do dataset

In [15]:
from datasets import load_dataset, DownloadMode

system_message = 'You are a model trained to identify skin lesions.'
prompt = 'Identify the skin disease in the image.'
answer = 'The skin disease in the image is {disease}.'


def format_data(sample):
    return {'messages': [
        {
            'role': 'system',
            'content': [{'type': 'text', 'text': system_message}],
        },
        {
            'role': 'user',
            'content': [
                {
                    'type': 'text',
                    'text': prompt,
                }, {
                    'type': 'image',
                    'image': sample['image'],
                }
            ],
        },
        {
            'role': 'assistant',
            'content': [{'type': 'text', 'text': answer.format(disease=sample['dx'].replace('_', ' '))}],
        },
    ],
    }


if not exists('../.data'):
    mkdir('../.data')

if not exists('../.cache'):
    mkdir('../.cache')

dataset = load_dataset('marmal88/skin_cancer', split='train')
dataset = [format_data(sample) for sample in dataset.take(3)]

Generating train split: 100%|██████████| 9577/9577 [00:24<00:00, 391.84 examples/s]
Generating validation split: 100%|██████████| 2492/2492 [00:08<00:00, 298.83 examples/s]
Generating test split: 100%|██████████| 1285/1285 [00:04<00:00, 266.67 examples/s]


## Inicialização do LLaMa 3.2

In [None]:
import torch
from transformers import AutoProcessor
from unsloth import FastVisionModel

model_id = 'unsloth/Llama-3.2-11B-Vision-Instruct'

model, tokenizer = FastVisionModel.from_pretrained(
    model_name=model_id,
    device_map='cuda:0',
    max_seq_length=2048,
    dtype=torch.bfloat16,
    use_gradient_checkpointing = 'unsloth',
    load_in_4bit=True,
)

processor = AutoProcessor.from_pretrained(model_id)

  library_name = re.sub("cuda\d+", f"cuda{override_value}", library_name, count=1)
  source = re.sub("([^\.])nn\.", r"\1torch.nn.", source)
  "self.rotary_emb = .+?\)", function,
  "self.rotary_emb = .+?\)", function,
  left = re.match("[\s\n]{4,}", leftover).span()[1]
  .replace("*", "\*").replace("^", "\^")\
  .replace("*", "\*").replace("^", "\^")\
  .replace("-", "\-").replace("_", "\_")\
  .replace("-", "\-").replace("_", "\_")\
  .replace(":", "\:").replace("+", "\+")\
  .replace(":", "\:").replace("+", "\+")\
  .replace(".", "\.").replace(",", "\,")\
  .replace(".", "\.").replace(",", "\,")\
  .replace("(", "\(").replace(")", "\)")\
  .replace("(", "\(").replace(")", "\)")\
  .replace("[", "\[").replace("]", "\]")\
  .replace("[", "\[").replace("]", "\]")\
  r"for ([^\s]{1,}) in " + modulelist_item + "\:[\n]" + \
  inherited_modules = re.findall(r"class ([^\s]{1,})\(" + inherited_class + "\)", full_source)
  called = re.findall(r"[\s]{1,}" + re.escape(function) + "\(.+?\)", full

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


  f"   \\\   /|    GPU: {gpu_stats.name}. Max memory: {max_memory} GB. Platform: {platform_system}.\n"\
  f"O^O/ \_/ \\    Torch: {torch.__version__}. CUDA: {gpu_stats.major}.{gpu_stats.minor}. CUDA Toolkit: {torch.version.cuda}. Triton: {triton_version}\n"\
  f"\        /    Bfloat16 = {str(SUPPORTS_BFLOAT16).upper()}. FA [Xformers = {xformers_version}. FA2 = {HAS_FLASH_ATTENTION}]\n"\
  start = re.search('logger\.info\([\"\'].+?Running training', inner_training_loop).span(0)[0]
  spaces = re.search('\n([\s\t]{1,})', original_debug).group(0)[1:]
  front_spaces = re.match('([\s\t]{1,})', inner_training_loop).group(0)
  gb_found = re.match("([0-9]{1,})[\s]{0,}GB", max_shard_size, flags = re.IGNORECASE)
  mb_found = re.match("([0-9]{1,})[\s]{0,}MB", max_shard_size, flags = re.IGNORECASE)
  f"   \\\   /|    [0] Installing llama.cpp will take 3 minutes.\n"\
  f"O^O/ \_/ \\    [1] Converting HF to GGUF 16bits will take 3 minutes.\n"\
  f"\        /    [2] Converting GGUF 16bits to {quantiza

==((====))==  Unsloth 2024.11.11: Fast Mllama vision patching. Transformers: 4.46.3.
   \\   /|    GPU: NVIDIA GeForce RTX 3060. Max memory: 11.658 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Downloading shards:   0%|          | 0/2 [01:54<?, ?it/s]
  f"   \\\   /|    GPU: {gpu_stats.name}. Max memory: {max_memory} GB. Platform: {platform_system}.\n"\
  f"O^O/ \_/ \\    Torch: {torch.__version__}. CUDA: {gpu_stats.major}.{gpu_stats.minor}. CUDA Toolkit: {torch.version.cuda}. Triton: {triton_version}\n"\
  f"\        /    Bfloat16 = {str(SUPPORTS_BFLOAT16).upper()}. FA [Xformers = {xformers_version}. FA2 = {HAS_FLASH_ATTENTION}]\n"\


KeyboardInterrupt: 

## Fine-tuning com LoRA

In [None]:
from peft import LoraConfig

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=8,
    bias='none',
    target_modules=['q_proj', 'v_proj'],
    task_type='CAUSAL_LM'
)

## Configuração de treinamento

In [None]:
from trl import SFTConfig

args = SFTConfig(
    output_dir='fine-tuned-visionllama-unsloth',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    optim='adamw_torch_fused',
    logging_steps=5,
    save_strategy='epoch',
    learning_rate=2e-4,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type='constant',
    push_to_hub=True,
    report_to='tensorboard',
    dataset_kwargs={'skip_prepare_dataset': True},
)

## Treina o modelo

In [None]:
from qwen_vl_utils import process_vision_info
from trl import SFTTrainer


def collate_fn(examples):
    texts = [processor.apply_chat_template(example['messages'], tokenize=False) for example in examples]
    image_inputs = [process_vision_info(example['messages'])[0]]
    for example in examples:
        batch = processor(text=texts, images=image_inputs, return_tensors='pt', padding=True)
        labels = batch['input_ids'].clone()
        labels[labels == processor.tokenizer.pad_token_id] = -100

        image_tokens = [processor.tokenizer.convert_tokens_to_ids(processor.image_token)]
        for image_token_id in image_tokens:
            labels[labels == image_token_id] = -100
        batch['labels'] = labels

    return batch


trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    data_collator=collate_fn,
    tokenizer=tokenizer,  # Use the tokenizer from Unsloth
    peft_config=peft_config
)

# Apply Unsloth optimizations
trainer = FastVisionModel.get_peft_model(
    trainer,
    r=8,
    target_modules=['q_proj', 'v_proj'],
    lora_alpha=16,
    lora_dropout=0.05,
    bias='none',
    use_gradient_checkpointing=True,
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

trainer.train()

## Salva o modelo

In [None]:
peft_model = trainer.model

output_dir = '../weights'
peft_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)