# Testes

### Autenticação

In [None]:
from scripts.authentcation import authenticate_huggingface

authenticate_huggingface()

### Carregamento do dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset('marmal88/skin_cancer', split='test')

### Carregamento do modelo

In [None]:
from unsloth import FastVisionModel
import torch

MODEL_NAME = 'unsloth/Llama-3.2-11B-Vision-Instruct'

model, tokenizer = FastVisionModel.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    use_gradient_checkpointing='unsloth',
)

### Preparação do teste

In [None]:
from os import makedirs
from os.path import join, exists

from scripts.messages import add_inference_message
from scripts.data import generate_test_samples

DIRECTORY = '../analysis'

DATA_SIZE = 100

OPEN_PROMPT = 'Classify the skin lesion in the image.'
CLASS_ONLY_PROMPT = OPEN_PROMPT + ' Say only the name of the disease and nothing else. The diseases to be classified are: ' + \
    'melanocytic Nevi, melanoma, benign keratosis-like lesions, ' + \
    'basal cell carcinoma, actinic keratoses, vascular lesions and dermatofibroma.'

prompt = OPEN_PROMPT
test_type = ''

if prompt == OPEN_PROMPT:
    test_type = 'open'
elif prompt == CLASS_ONLY_PROMPT:
    test_type = 'class_only'

messages = add_inference_message(prompt)
samples = generate_test_samples(dataset, DATA_SIZE)
test_name = f'{MODEL_NAME.strip("unsloth/").strip("../weights/")}_test_{DATA_SIZE}_{test_type}'
test_output = {'model': MODEL_NAME, 'data_size': DATA_SIZE, 'type': test_type, 'results': []}

if not exists(DIRECTORY):
    makedirs(DIRECTORY)

### Testes

In [None]:
from tqdm.notebook import tqdm

FastVisionModel.for_inference(model)

input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

for idx, (image, disease) in enumerate(tqdm(samples, desc='Testing: ')):
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors='pt',
    ).to('cuda')

    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        use_cache=True,
        temperature=0.1,
        min_p=0.1
    )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_message = result.split('assistant')[-1].strip()
    test_output['results'].append({'expected': disease.replace('_', ' '), 'actual': assistant_message})

### Salvamento dos testes

In [None]:
from json import dump

output_path = join(DIRECTORY, f'{test_name}.json')

with open(output_path, 'w') as f:
    dump(test_output, f, indent=4)