# Fine-tuning

### Configuração de ambiente

In [None]:
from os import environ

environ['CUDA_VISIBLE_DEVICES'] = input('Enter GPU ID: ')

### Imports

In [None]:
from os.path import join
from json import load, dump
from datetime import timedelta

from unsloth import FastVisionModel
from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from tqdm.notebook import tqdm

import torch

from scripts.authentication import authenticate_huggingface
from scripts.data import SimpleLesionData, SimpleDatasetAnalysis
from scripts.messages import create_training_message
from scripts.training import Training

import scripts.definitions as defs

### Autenticação

In [None]:
authenticate_huggingface()

### Configuração

In [None]:
VERSION = '0.2'

training_hyperparameters = Training(
    base_model_name=defs.BASE_MODEL_NAME,
    trained_model_name=defs.MODEL_NAME,
    quantization=False,
    prompt_type=defs.PromptType.REPORT,
    version=VERSION,
    size=11,
    peft_hyperparameters={
        # Camadas
        'finetune_vision_layers': True,
        'finetune_language_layers': True,
        'finetune_attention_modules': True,
        'finetune_mlp_modules': True,
        # LoRA
        'r': 128,
        'lora_alpha': 64,
        'lora_dropout': 0.001,
        'bias': 'none',
        'random_state': defs.STATIC_RANDOM_STATE,
        'use_rslora': True,
        'loftq_config': None
    },
    sft_hyperparameters={
        # Controle de memória
        'per_device_train_batch_size': 4,
        'gradient_accumulation_steps': 1,
        # Controle de treinamento
        'learning_rate': 2e-4,
        'weight_decay': 0.01,
        'max_steps': 30,
        # 'num_train_epochs': 2.0,
        'lr_scheduler_type': 'linear',
        'warmup_ratio': 0.0,
        'optim': 'adamw_8bit',
        # Monitoramento
        'logging_steps': 0.1,
        'report_to': 'tensorboard',
        'output_dir': 'outputs',
        # Aleatoriedade
        'seed': defs.STATIC_RANDOM_STATE,
        # Tipos
        'bf16': is_bf16_supported(),
        'fp16': not is_bf16_supported(),
        # Dataset
        'remove_unused_columns': False,
        'dataset_text_field': '',
        'dataset_kwargs': {'skip_prepare_dataset': True},
        'dataset_num_proc': 4,
        # Janela de contexto
        'max_seq_length': 2048
    },
    used_memory=0.0,
    training_time=0.0
)

with open(join(defs.TRAINING_PATH, f'hyperparameters_{training_hyperparameters.version}.json'), 'w', encoding='utf-8') as file:
    dump(training_hyperparameters.model_dump(), file, indent=4, ensure_ascii=False)

### Carregamento do dataset

In [None]:
with open(join(defs.DATA_PATH, 'stt_data', 'training_dataset.json'), 'r', encoding='utf-8') as file:
    training_dataset = [SimpleLesionData(**data) for data in load(file)]

with open(join(defs.DATA_PATH, 'training_dataset_analysis.json'), 'r', encoding='utf-8') as file:
    training_dataset_analysis = SimpleDatasetAnalysis(**load(file))

### Preparação das mensagens

In [None]:
training_messages = []
validation_messages = []

for lesion_data in tqdm(training_dataset, desc='Criando mensagens de treinamento: '):
    training_messages.append(create_training_message(training_hyperparameters.prompt_type,
                                                     lesion_data,
                                                     training_dataset_analysis))

### Inicialização do LLaMa 3.2

In [None]:
model, tokenizer = FastVisionModel.from_pretrained(
    training_hyperparameters.base_model_name,
    load_in_4bit=training_hyperparameters.quantization,
    use_gradient_checkpointing='unsloth'
)

### Configuração de treinamento

In [None]:
model = FastVisionModel.get_peft_model(
    model,
    **training_hyperparameters.peft_hyperparameters
)

FastVisionModel.for_training(model)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    data_collator=UnslothVisionDataCollator(model, tokenizer),
    train_dataset=training_messages,
    args=SFTConfig(**training_hyperparameters.sft_hyperparameters),
)

### Treinamento

In [None]:
trainer_stats = trainer.train()

In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
print(f'Tempo de treinamento: {timedelta(seconds=trainer_stats.metrics["train_runtime"])}')
print(f'Memória máxima reservada: {used_memory} GB')

training_hyperparameters.used_memory = used_memory
training_hyperparameters.training_time = trainer_stats.metrics['train_runtime']

with open(join(defs.TRAINING_PATH, f'hyperparameters_{training_hyperparameters.version}.json'), 'w', encoding='utf-8') as file:
    dump(training_hyperparameters.model_dump(), file, indent=4, ensure_ascii=False)

### Salvamento

In [None]:
trained_model_name = f'{training_hyperparameters.trained_model_name}-{training_hyperparameters.version}-{training_hyperparameters.size}B'

if training_hyperparameters.quantization:
    trained_model_name += '-4bit'

if training_hyperparameters.prompt_type == defs.PromptType.SIMPLE_CLASSIFICATION:
    trained_model_name += '-SC'

save_path = join(defs.RESULTS_PATH, 'adapter_weights', trained_model_name)

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

with open(join(defs.TRAINING_PATH, 'models.json'), 'r', encoding='utf-8') as file:
    models = {name: defs.Model(**data) for name, data in load(file).items()}

new_model = defs.Model(
    local=True,
    quantized=training_hyperparameters.quantization,
    prompt_type=training_hyperparameters.prompt_type,
    version=training_hyperparameters.version,
    size=training_hyperparameters.size
)

models[trained_model_name] = new_model

for name, trained_model in models.items():
    models[name] = trained_model.model_dump()  # type: ignore

with open(join(defs.TRAINING_PATH, 'models.json'), 'w', encoding='utf-8') as file:
    dump(models, file, indent=4, ensure_ascii=False)