# Análise dos testes

### Imports

In [4]:
from os.path import join
from json import load
from collections import OrderedDict, Counter

from IPython.display import display, HTML

from sklearn.metrics import accuracy_score, confusion_matrix

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import scripts.configuration as config

### Configuração

In [2]:
TEST_NAME = 'Llama-3.2-11B-Vision-Instruct_test.json'
PLOT_TITLE = 'LLaMA-3.2-11B-Vision-Instruct (Quantizado)'

### Carregamento dos testes

In [5]:
with open(join(config.RESULTS_PATH, 'tests', TEST_NAME), 'r', encoding='utf-8') as file:
    test = load(file)

### Contagem das classes

In [11]:
def create_distribution_table(counter: Counter, total: int, title: str) -> pd.DataFrame:
    '''
    Cria um DataFrame formatado a partir dos dados de um contador.
    '''

    df = pd.DataFrame([
        {
            'Answer': answer if len(answer) < 50 else answer[:51] + '...',
            'Count': count,
            'Percentage': count / total * 100
        }
        for answer, count in counter.items()
    ])

    if total > 0:
        df = df.sort_values('Count', ascending=False)
        df['Percentage'] = df['Percentage'].map('{:.1f}%'.format)
    else:
        df['Percentage'] = '-'

    styled_df = df.style.set_caption(title).set_table_styles([
        {'selector': 'caption', 'props': [('font-weight', 'bold'), ('font-size', '120%')]},
        {'selector': 'th', 'props': [('text-align', 'left'), ('background-color', '#f0f0f0')]},
        {'selector': 'table', 'props': [('width', '100%')]},
    ]).hide(axis='index')

    return styled_df


def display_model_results(model_name: str, test_data: list[dict[str, str]]):
    '''
    Exibe a distribuição em tabela dos resultados esperados vs reais para um modelo.
    '''

    results_on_test = test_data['results_on_test']
    results_on_training = test_data['results_on_training']
    counter_on_test = Counter(item['answer'] for item in results_on_test)
    counter_on_training = Counter(item['answer'] for item in results_on_training)

    display(HTML(f'<h2>{model_name}</h2>'))
    display(HTML("""
    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
        <div>
            """ + create_distribution_table(counter_on_test, len(results_on_test), 'Model predictions distribution for test data').to_html() + """
        </div>
        <div>
            """ + create_distribution_table(counter_on_training, len(results_on_training), 'Model predictions distribution for training data').to_html() + """
        </div>
    </div>
    <br><hr><br>
    """))


display_model_results(PLOT_TITLE, test)

Answer,Count,Percentage
Lesões de pele que necessitam de avaliação presenci...,677,79.7%
Ceratose seborreica.,91,10.7%
Câncer de pele Melanoma.,13,1.5%
**Classificação da lesão de pele na imagem:** * ...,13,1.5%
Lesões de pele que necessitam de avaliação presenci...,12,1.4%
Câncer de pele Não Melanoma (CEC ou CBC).,12,1.4%
A lesão de pele na imagem é classificada como **Cer...,8,0.9%
As lesões de pele na imagem são classificadas como ...,7,0.8%
As lesões de pele na imagem são classificadas como ...,5,0.6%
As opções de classificação são: dict_keys(['Lesões ...,1,0.1%

Percentage


### Processamento dos testes

In [None]:
DISEASE_CLASSES = [
    'melanocytic Nevi',
    'melanoma',
    'benign keratosis-like lesions',
    'basal cell carcinoma',
    'actinic keratoses',
    'vascular lesions',
    'dermatofibroma'
]


def normalize_disease(text: str) -> str:
    '''
    Normaliza o nome da doença para um formato padrão.
    '''

    text = text.lower().strip().strip('.')

    variations = {
        'melanocytic nevi': 'melanocytic Nevi',
        'nevus': 'melanocytic Nevi',
        'nevi': 'melanocytic Nevi',
        'benign keratosis': 'benign keratosis-like lesions',
        'seborrheic keratosis': 'benign keratosis-like lesions',
        'basal cell': 'basal cell carcinoma',
        'actinic keratosis': 'actinic keratoses',
        'vascular': 'vascular lesions'
    }

    for variant, standard in variations.items():
        if variant in text:
            return standard

    return text


def classify_answer(text: str) -> str:
    '''
    Classifica a resposta do modelo em uma classe de doença padrão ou incerta.
    '''

    text = text.lower()

    mentioned_diseases = []
    for disease in DISEASE_CLASSES:
        if disease.lower() in text:
            mentioned_diseases.append(disease)

    if len(mentioned_diseases) != 1:
        return 'unclear answer'
    else:
        return mentioned_diseases[0]


def process_and_display_results(model_name: str, test_data: list[dict[str, str]]) -> list[tuple[str, str]]:
    '''
    Processa e exibe os resultados com classificações padronizadas. Retorna uma lista de pares (esperado, classificado).
    '''

    results = test_data['results']
    total = len(results)

    processed_results = [
        {
            'expected': result['expected'],
            'actual': result['actual'],
            'classified': classify_answer(result['actual'])
        }
        for result in results
    ]

    pairs = [(r['expected'], r['classified']) for r in processed_results]

    expected_counter = Counter(item['expected'] for item in processed_results)
    classified_counter = Counter(item['classified'] for item in processed_results)

    display(HTML(f'<h2>{model_name}</h2>'))
    display(HTML("""
    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
        <div>
            """ + create_distribution_table(expected_counter, total, 'Expected Classes').to_html() + """
        </div>
        <div>
            """ + create_distribution_table(classified_counter, total, 'Classified Predictions').to_html() + """
        </div>
    </div>
    <br><hr><br>
    """))

    return pairs


all_results = {}

for model_name, test_data in tests.items():
    all_results[model_name] = process_and_display_results(model_name, test_data)

### Análise

In [None]:
def analyze_model_performance(pairs: list[tuple[str, str]], model_name: str):
    '''
    Calcula a acurácia e cria uma visualização da matriz de confusão normalizada.
    '''

    class_pt_translations = {
        'melanocytic Nevi': 'Nevo melanocítico',
        'melanoma': 'Melanoma',
        'benign keratosis-like lesions': 'Lesões similares à queratose benigna',
        'basal cell carcinoma': 'Carcinoma basocelular',
        'actinic keratoses': 'Queratose actínica',
        'vascular lesions': 'Lesões vasculares',
        'dermatofibroma': 'Dermatofibroma',
        'unclear answer': 'Resposta incerta'
    }

    y_true = [p[0] for p in pairs]
    x_pred = [p[1] for p in pairs]

    accuracy = accuracy_score(y_true, x_pred)

    labels = DISEASE_CLASSES + ['unclear answer']

    pt_labels = [class_pt_translations[label] for label in labels]
    pt_y_true = [class_pt_translations[label] for label in y_true]
    pt_x_pred = [class_pt_translations[label] for label in x_pred]

    cm = confusion_matrix(pt_y_true, pt_x_pred, labels=pt_labels, normalize='true')

    plt.figure(figsize=(12, 10))
    heat_map = sns.heatmap(cm,
                           annot=True,
                           fmt='.1%',
                           cmap='Blues',
                           xticklabels=pt_labels,
                           yticklabels=pt_labels,
                           vmin=0,
                           vmax=1)
    color_bar = heat_map.collections[0].colorbar
    color_bar.set_ticks([0, 0.25, 0.5, 0.75, 1])
    color_bar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
    plt.title(f'{model_name}\nAcurácia: {accuracy:.2%}')
    plt.xlabel('Previsto')
    plt.ylabel('Verdadeiro')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(f'../plots/{model_name}.png', dpi=300)
    plt.show()

    return accuracy


accuracies = {}
for model_name, pairs in all_results.items():
    accuracies[model_name] = analyze_model_performance(pairs, model_name)