In [None]:
# PONTO DE FOCO
# TRABALHAR AQUI - V1 USANDO AS IMAGENS MO

import os
import cv2
import numpy as np
import pandas as pd
from skimage.color import rgb2gray
from skimage.feature import hog
import matplotlib.pyplot as plt

# Diretórios
original_dir = "ft-DB"
mask_dir = "ground-truth-masks"
output_csv = "features/features.csv"

# Cria a pasta de saída se necessário
os.makedirs("features", exist_ok=True)

# HOG params
hog_params = {
    "orientations": 9,
    "pixels_per_cell": (16, 16),
    "cells_per_block": (2, 2),
    "visualize": False,
    "feature_vector": True
}

# Mapping pasta → class_id
# Ignora diretórios ocultos e o .git
exclude_dirs = {'.git', '.ipynb_checkpoints', '__pycache__'}
class_names = sorted([
    d for d in os.listdir(original_dir)
    if os.path.isdir(os.path.join(original_dir, d)) and not d.startswith('.') and d not in exclude_dirs
])
print(class_names)
class_id_map = {name: idx for idx, name in enumerate(class_names)}

# Lista para salvar os dados
data = []

for class_name in class_names:
    class_path = os.path.join(original_dir, class_name)
    for filename in os.listdir(class_path):
        if not filename.endswith(".png"):
            continue

        filepath = os.path.join(class_path, filename)
        img = cv2.imread(filepath)

        if img is None:
            print(f"Erro ao carregar: {filepath}")
            continue

        # Tenta carregar máscara
        mask_path = os.path.join(mask_dir, class_name, filename)
        if os.path.exists(mask_path):
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            mask_bin = (mask > 0).astype(np.uint8)
        else:
            mask_bin = np.ones(img.shape[:2], dtype=np.uint8)

        # Aplica máscara
        masked_img = cv2.bitwise_and(img, img, mask=mask_bin)

        # --- Estatísticas de cor
        mean_r = np.mean(masked_img[:, :, 0][mask_bin == 1])
        mean_g = np.mean(masked_img[:, :, 1][mask_bin == 1])
        mean_b = np.mean(masked_img[:, :, 2][mask_bin == 1])
        std_r = np.std(masked_img[:, :, 0][mask_bin == 1])
        std_g = np.std(masked_img[:, :, 1][mask_bin == 1])
        std_b = np.std(masked_img[:, :, 2][mask_bin == 1])

        # --- Histograma de cores (16 bins por canal)
        hist_r = cv2.calcHist([img], [0], mask_bin, [16], [0, 256]).flatten()
        hist_g = cv2.calcHist([img], [1], mask_bin, [16], [0, 256]).flatten()
        hist_b = cv2.calcHist([img], [2], mask_bin, [16], [0, 256]).flatten()

        # Normaliza histograma
        hist_r /= hist_r.sum() if hist_r.sum() > 0 else 1
        hist_g /= hist_g.sum() if hist_g.sum() > 0 else 1
        hist_b /= hist_b.sum() if hist_b.sum() > 0 else 1

        color_hist = np.concatenate([hist_r, hist_g, hist_b])

        # --- HOG
        gray = rgb2gray(masked_img)
        hog_feats = hog(gray, **hog_params)

        # --- Combina tudo
        class_id = class_id_map[class_name]
        features = [filename, class_id, mean_r, mean_g, mean_b, std_r, std_g, std_b] + \
                   color_hist.tolist() + hog_feats.tolist()

        data.append(features)

# Cabeçalhos
hog_size = len(hog_feats)
color_hist_size = 48
columns = ["filename", "class_id", "mean_r", "mean_g", "mean_b", "std_r", "std_g", "std_b"]
columns += [f"hist_color_{i}" for i in range(color_hist_size)]
columns += [f"hog_{i}" for i in range(hog_size)]

# Salva no CSV
df = pd.DataFrame(data, columns=columns)
df.to_csv(output_csv, index=False)
print(f"✅ Features salvas em {output_csv}")


['apple', 'banana', 'grape', 'guava', 'lemon', 'morgote', 'orange', 'pear', 'persimmon', 'poukan']
✅ Features salvas em features/features.csv


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.color import rgb2gray
from skimage.feature import hog
import matplotlib.pyplot as plt

# Diretórios
original_dir = "ft-DB"
mask_dir = "dataset"
output_csv = "features/features.csv"

# Cria a pasta de saída se necessário
os.makedirs("features", exist_ok=True)

# HOG params
hog_params = {
    "orientations": 9,
    "pixels_per_cell": (16, 16),
    "cells_per_block": (2, 2),
    "visualize": False,
    "feature_vector": True
}

# Mapping pasta → class_id
# Ignora diretórios ocultos e o .git
exclude_dirs = {'.git', '.ipynb_checkpoints', '__pycache__'}
class_names = ['apple', 'banana', 'grape', 'guava', 'lemon', 'morgote', 'orange', 'pear', 'persimmon', 'poukan']

class_id_map = {name: idx for idx, name in enumerate(class_names)}

# Lista para salvar os dados
data = []
for class_name in class_names:
    class_path = os.path.join(original_dir, class_name)
    for filename in os.listdir(class_path):
        if not filename.endswith(".png"):
            continue

        filepath = os.path.join(class_path, filename)
        img = cv2.imread(filepath)

        if img is None:
            print(f"Erro ao carregar: {filepath}")
            continue

        # Tenta carregar máscara
        mask_path = os.path.join(mask_dir, class_name, filename)
        if os.path.exists(mask_path):
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            mask_bin = (mask > 0).astype(np.uint8)
        else:
            mask_bin = np.ones(img.shape[:2], dtype=np.uint8)

        # Aplica máscara
        masked_img = cv2.bitwise_and(img, img, mask=mask_bin)

        # --- Estatísticas de cor
        mean_r = np.mean(masked_img[:, :, 0][mask_bin == 1])
        mean_g = np.mean(masked_img[:, :, 1][mask_bin == 1])
        mean_b = np.mean(masked_img[:, :, 2][mask_bin == 1])
        std_r = np.std(masked_img[:, :, 0][mask_bin == 1])
        std_g = np.std(masked_img[:, :, 1][mask_bin == 1])
        std_b = np.std(masked_img[:, :, 2][mask_bin == 1])

        # --- Histograma de cores (16 bins por canal)
        hist_r = cv2.calcHist([img], [0], mask_bin, [16], [0, 256]).flatten()
        hist_g = cv2.calcHist([img], [1], mask_bin, [16], [0, 256]).flatten()
        hist_b = cv2.calcHist([img], [2], mask_bin, [16], [0, 256]).flatten()

        # Normaliza histograma
        hist_r /= hist_r.sum() if hist_r.sum() > 0 else 1
        hist_g /= hist_g.sum() if hist_g.sum() > 0 else 1
        hist_b /= hist_b.sum() if hist_b.sum() > 0 else 1

        color_hist = np.concatenate([hist_r, hist_g, hist_b])

        # --- HOG
        gray = rgb2gray(masked_img)
        hog_feats = hog(gray, **hog_params)

        # --- Combina tudo
        class_id = class_id_map[class_name]
        features = [filename, class_id, mean_r, mean_g, mean_b, std_r, std_g, std_b] + \
                   color_hist.tolist() + hog_feats.tolist()

        data.append(features)

# Cabeçalhos
hog_size = len(hog_feats)
color_hist_size = 48
columns = ["filename", "class_id", "mean_r", "mean_g", "mean_b", "std_r", "std_g", "std_b"]
columns += [f"hist_color_{i}" for i in range(color_hist_size)]
columns += [f"hog_{i}" for i in range(hog_size)]

# Salva no CSV
df = pd.DataFrame(data, columns=columns)
df.to_csv(output_csv, index=False)
print(f"✅ Features salvas em {output_csv}")


aqui
✅ Features salvas em features/features.csv


In [23]:
import os
import cv2
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
from skimage.color import rgb2gray
from skimage.feature import hog

# --------------------------------------------------------
# CONFIGURAÇÕES PRINCIPAIS
# --------------------------------------------------------

images_dir = "dataset/images"           # <- PASTA COM AS IMAGENS
xml_file = "dataset/annotations.xml"   # <- XML exportado do CVAT
output_csv = "features/features.csv"

os.makedirs("features", exist_ok=True)

# Parâmetros do HOG
hog_params = {
    "orientations": 9,
    "pixels_per_cell": (16, 16),
    "cells_per_block": (2, 2),
    "visualize": False,
    "feature_vector": True
}

# Tamanho FIXO do ROI (fundamental!)
ROI_SIZE = (128, 256)  # (Largura, Altura) — ajuste se quiser

# Mapeamento de classes
class_names = ['apple', 'banana', 'grape', 'guava', 'lemon',
               'morgote', 'orange', 'pear', 'persimmon', 'poukan']
class_id_map = {name: idx for idx, name in enumerate(class_names)}

# --------------------------------------------------------
# EXTRATOR
# --------------------------------------------------------

# Parse do XML
tree = ET.parse(xml_file)
root = tree.getroot()

data = []
print("🚀 Iniciando extração de features...")

for image_tag in root.findall('image'):
    filename = image_tag.attrib['name']
    img_path = os.path.join(images_dir, filename)

    img = cv2.imread(img_path)
    if img is None:
        print(f"❌ Não encontrado: {img_path}")
        continue

    for box in image_tag.findall('box'):
        label = box.attrib['label']
        if label not in class_id_map:
            print(f"⚠️ Classe desconhecida: {label}")
            continue

        # Bounding box
        xtl = int(float(box.attrib['xtl']))
        ytl = int(float(box.attrib['ytl']))
        xbr = int(float(box.attrib['xbr']))
        ybr = int(float(box.attrib['ybr']))

        roi = img[ytl:ybr, xtl:xbr]
        if roi.size == 0:
            print(f"⚠️ ROI vazia em {filename}")
            continue

        # Redimensiona ROI → TAMANHO FIXO!
        roi_resized = cv2.resize(roi, ROI_SIZE)

        # Features estatísticas
        mean_r = np.mean(roi_resized[:, :, 2])
        mean_g = np.mean(roi_resized[:, :, 1])
        mean_b = np.mean(roi_resized[:, :, 0])
        std_r = np.std(roi_resized[:, :, 2])
        std_g = np.std(roi_resized[:, :, 1])
        std_b = np.std(roi_resized[:, :, 0])

        # Histograma de cor (16 bins)
        hist_r = cv2.calcHist([roi_resized], [2], None, [16], [0, 256]).flatten()
        hist_g = cv2.calcHist([roi_resized], [1], None, [16], [0, 256]).flatten()
        hist_b = cv2.calcHist([roi_resized], [0], None, [16], [0, 256]).flatten()

        hist_r /= hist_r.sum() if hist_r.sum() > 0 else 1
        hist_g /= hist_g.sum() if hist_g.sum() > 0 else 1
        hist_b /= hist_b.sum() if hist_b.sum() > 0 else 1

        color_hist = np.concatenate([hist_r, hist_g, hist_b])

        # HOG
        gray = rgb2gray(roi_resized)
        hog_feats = hog(gray, **hog_params)

        # Combina tudo
        class_id = class_id_map[label]
        features = [filename, class_id, mean_r, mean_g, mean_b,
                    std_r, std_g, std_b] + color_hist.tolist() + hog_feats.tolist()

        data.append(features)

print(f"✅ Total de amostras extraídas: {len(data)}")

# --------------------------------------------------------
# MONTA E SALVA CSV
# --------------------------------------------------------

hog_size = len(hog_feats)
color_hist_size = 48  # 16 bins * 3 canais

columns = ["filename", "class_id",
           "mean_r", "mean_g", "mean_b",
           "std_r", "std_g", "std_b"]

columns += [f"hist_color_{i}" for i in range(color_hist_size)]
columns += [f"hog_{i}" for i in range(hog_size)]

df = pd.DataFrame(data, columns=columns)
df.to_csv(output_csv, index=False)

print(f"✅ Features salvas em: {output_csv}")

🚀 Iniciando extração de features...
✅ Total de amostras extraídas: 200
✅ Features salvas em: features/features.csv
