In [1]:
import os
import numpy as np
from PIL import Image
from collections import defaultdict
import pandas as pd

In [2]:
# Lista de classes fornecidas pelo usuário
classes = [
    "Skin", "Out of focus area", "Oral mucosa", "Enamel", "Tongue", "Lip", "Hard palate",
    "Specular reflection", "Attached gingiva", "Soft palate", "Hair", "Marginal gingiva",
    "Prosthetics", "Shadow/Noise", "Plastic", "Metal", "Gingivitis", "Attrition/Erosion",
    "Inflammation", "Pigmentation", "Calculus", "Initial caries", "Stain", "Fluorosis",
    "Microfracture", "Root", "Plaque", "Dentine caries", "Ulcer", "Leukoplakia",
    "Blood vessel", "Mole", "Malignant lesion", "Fibroma", "Makeup"
]

# Criar um dicionário mapeando o nome da classe para um valor numérico (int)
class_map = {cls: idx + 1 for idx, cls in enumerate(classes)}  # Começa em 1 (0 = fundo)

# Caminho base das máscaras
base_dir = "/app/dataset"

In [3]:
pixel_counts = defaultdict(int)
image_presence = defaultdict(int)

# Processa todas as máscaras
for root, _, files in os.walk(base_dir):
    for file in files:
        if file.endswith("_masks.png"):
            mask_path = os.path.join(root, file)
            mask = np.array(Image.open(mask_path))

            unique, counts = np.unique(mask, return_counts=True)

            for val, count in zip(unique, counts):
                pixel_counts[val] += count
                image_presence[val] += 1  # conta 1 para esta imagem

# Criação do DataFrame com ambas as informações
df = pd.DataFrame([
    {
        "Classe": cls,
        "ID": class_map[cls],
        "Pixels": pixel_counts.get(class_map[cls], 0),
        "Imagens com a classe": image_presence.get(class_map[cls], 0)
    }
    for cls in class_map
])

# Adiciona o fundo (0)
df.loc[len(df)] = {
    "Classe": "Fundo",
    "ID": 0,
    "Pixels": pixel_counts.get(0, 0),
    "Imagens com a classe": image_presence.get(0, 0)
}

df = df.sort_values("ID").reset_index(drop=True)
df


Unnamed: 0,Classe,ID,Pixels,Imagens com a classe
0,Fundo,0,33753123,100
1,Skin,1,9438329,68
2,Out of focus area,2,2635782,47
3,Oral mucosa,3,4612801,55
4,Enamel,4,2611152,72
5,Tongue,5,1692267,20
6,Lip,6,1937977,62
7,Hard palate,7,1138201,25
8,Specular reflection,8,231520,73
9,Attached gingiva,9,1189526,45


In [4]:
df = df.sort_values("Pixels").reset_index(drop=True)
df


Unnamed: 0,Classe,ID,Pixels,Imagens com a classe
0,Fibroma,34,0,0
1,Makeup,35,0,0
2,Malignant lesion,33,0,0
3,Ulcer,29,783,4
4,Blood vessel,31,804,4
5,Mole,32,1451,6
6,Microfracture,25,1974,4
7,Leukoplakia,30,2815,2
8,Dentine caries,28,4127,7
9,Plaque,27,4742,4
