# Modelos Especialistas

**Objetivo**: Criar modelos especialistas que classificam em:
- **HEALTHY**: Planta saudável
- **UNHEALTHY**: Planta doente (qualquer doença)

In [9]:
# 1. CARREGAMENTO DE DADOS
from utils import *
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd

config = carregar_configuracoes()

def carregar_dataset(especie):
    """Carrega dataset agrupando todas as doenças"""
    print(f"📂 Carregando dataset de {especie}...")
    
    # Construir dataset_info
    dataset_info = {}
    for esp, info in config['especialistas'].items():
        for classe in info['classes']:
            dataset_info[classe] = {}
    
    healthy_images = []
    unhealthy_images = []
    
    # Processar cada classe da espécie
    for classe, info in dataset_info.items():
        # Remover underscores 
        classe_normalizada = classe.lower().replace('_', '')
        especie_normalizada = especie.lower().replace('_', '')
        
        if especie_normalizada in classe_normalizada:
            # Usar base_path como diretório base das imagens
            dir_path = os.path.join(config.get('processed_data_path', config['base_path']), classe)
            
            if not os.path.exists(dir_path):
                print(f"   ⚠️ Diretório não encontrado: {dir_path}")
                continue
                
            images_in_dir = []
            for img_name in os.listdir(dir_path):
                if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                    images_in_dir.append(os.path.join(dir_path, img_name))
            
            # AGRUPAMENTO BINÁRIO
            if 'healthy' in classe.lower():
                healthy_images.extend(images_in_dir)
                print(f"   ✅ {classe}: {len(images_in_dir)} → HEALTHY")
            else:
                unhealthy_images.extend(images_in_dir)
                print(f"   🦠 {classe}: {len(images_in_dir)} → UNHEALTHY")
    
    # Combinar dados
    all_images = healthy_images + unhealthy_images
    all_labels = ['healthy'] * len(healthy_images) + ['unhealthy'] * len(unhealthy_images)
    
    # Proteção contra divisão por zero
    if len(all_images) == 0:
        print(f"   ❌ ERRO: Nenhuma imagem encontrada para {especie}!")
        print(f"   🔍 Verifique se as pastas existem e contêm imagens.")
        return None
    
    balance_ratio = len(healthy_images) / len(all_images) * 100
    print(f"   📊 Total: {len(all_images)} | Healthy: {len(healthy_images)} ({balance_ratio:.1f}%) | Unhealthy: {len(unhealthy_images)} ({100-balance_ratio:.1f}%)")
    
    # Dividindo em treino, validação e teste para todos os datasets
    X_temp, X_test, y_temp, y_test = train_test_split(
        all_images, all_labels, test_size=0.15, stratify=all_labels, random_state=42
    )
    
    # Dividindo em treino, validação e teste para cada dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=0.176, stratify=y_temp, random_state=42
    )
    
    return {
        'train': {'X': X_train, 'y': y_train},
        'val': {'X': X_val, 'y': y_val},
        'test': {'X': X_test, 'y': y_test},
        'info': {'balance_ratio': balance_ratio, 'total': len(all_images)}
    }

# Carregar datasets binários reais
print("=== CARREGANDO DATASETS BINÁRIOS CORRIGIDOS ===")
dataset_tomato = carregar_dataset('tomato')
print()
dataset_potato = carregar_dataset('potato')
print()
dataset_pepper = carregar_dataset('pepper_bell')

# Verificar se todos os datasets foram carregados com sucesso
datasets_validos = []
if dataset_tomato is not None:
    datasets_validos.append('Tomato')
if dataset_potato is not None:
    datasets_validos.append('Potato')    
if dataset_pepper is not None:
    datasets_validos.append('Pepper')

if len(datasets_validos) > 0:
    print(f"\n✅ DATASETS BINÁRIOS CARREGADOS: {', '.join(datasets_validos)}")
else:
    print("\n❌ ERRO: Nenhum dataset foi carregado com sucesso!")


=== CARREGANDO DATASETS BINÁRIOS CORRIGIDOS ===
📂 Carregando dataset de tomato...
   🦠 Tomato_Bacterial_spot: 2127 → UNHEALTHY
   🦠 Tomato_Early_blight: 1000 → UNHEALTHY
   🦠 Tomato_Late_blight: 1909 → UNHEALTHY
   🦠 Tomato_Leaf_Mold: 952 → UNHEALTHY
   🦠 Tomato_Septoria_leaf_spot: 1771 → UNHEALTHY
   🦠 Tomato_Spider_mites_Two_spotted_spider_mite: 1676 → UNHEALTHY
   🦠 Tomato__Target_Spot: 1404 → UNHEALTHY
   🦠 Tomato__Tomato_YellowLeaf__Curl_Virus: 3208 → UNHEALTHY
   🦠 Tomato__Tomato_mosaic_virus: 373 → UNHEALTHY
   ✅ Tomato_healthy: 1591 → HEALTHY
   📊 Total: 16011 | Healthy: 1591 (9.9%) | Unhealthy: 14420 (90.1%)

📂 Carregando dataset de potato...
   🦠 Potato___Early_blight: 1000 → UNHEALTHY
   🦠 Potato___Late_blight: 1000 → UNHEALTHY
   ✅ Potato___healthy: 152 → HEALTHY
   📊 Total: 2152 | Healthy: 152 (7.1%) | Unhealthy: 2000 (92.9%)

📂 Carregando dataset de pepper_bell...
   🦠 Pepper__bell___Bacterial_spot: 997 → UNHEALTHY
   ✅ Pepper__bell___healthy: 1478 → HEALTHY
   📊 Total: 2

In [None]:
# 2. ARQUITETURA E TREINAMENTO OTIMIZADO
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def criar_classificao_binaria(dataset, config):
    """Cria geradores otimizados para classificação binária"""
    
    # Data augmentation para generalização
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=30,
        width_shift_range=0.3,
        height_shift_range=0.3,
        horizontal_flip=True,
        vertical_flip=True,
        zoom_range=0.3,
        brightness_range=[0.7, 1.3],
        fill_mode='nearest'
    )
    
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    # DataFrames
    train_df = pd.DataFrame({'filename': dataset['train']['X'], 'class': dataset['train']['y']})
    val_df = pd.DataFrame({'filename': dataset['val']['X'], 'class': dataset['val']['y']})
    test_df = pd.DataFrame({'filename': dataset['test']['X'], 'class': dataset['test']['y']})
    
    # Geradores binários
    train_gen = train_datagen.flow_from_dataframe(
        train_df, x_col='filename', y_col='class',
        target_size=(config['img_height'], config['img_width']),
        batch_size=config['batch_size'],
        class_mode='binary', shuffle=True, seed=42
    )
    
    val_gen = val_test_datagen.flow_from_dataframe(
        val_df, x_col='filename', y_col='class',
        target_size=(config['img_height'], config['img_width']),
        batch_size=config['batch_size'],
        class_mode='binary', shuffle=False, seed=42
    )
    
    test_gen = val_test_datagen.flow_from_dataframe(
        test_df, x_col='filename', y_col='class',
        target_size=(config['img_height'], config['img_width']),
        batch_size=config['batch_size'],
        class_mode='binary', shuffle=False, seed=42
    )
    
    return train_gen, val_gen, test_gen

def criar_modelo(especie_nome):
    """Cria modelo de classificação binária"""
    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3)
    )
    
    # Descongelar últimas camadas
    base_model.trainable = True
    for layer in base_model.layers[:-15]:
        layer.trainable = False
    
    # Arquitetura otimizada para classificação binária
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.001))(x)
    x = Dropout(0.6)(x)
    x = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(x)
    x = Dropout(0.5)(x)
    
    # Saída binária com sigmoid
    predictions = Dense(1, activation='sigmoid', name=f'output_{especie_nome}')(x)
    
    modelo = Model(inputs=base_model.input, outputs=predictions)
    
    print(f"✅ Modelo binário {especie_nome}: {modelo.count_params():,} parâmetros")
    return modelo

def calcular_class_weights(dataset):
    """Calcula class weights balanceados"""
    healthy_count = sum(1 for label in dataset['train']['y'] if label == 'healthy')
    unhealthy_count = len(dataset['train']['y']) - healthy_count
    
    total = len(dataset['train']['y'])
    weight_healthy = total / (2 * healthy_count)
    weight_unhealthy = total / (2 * unhealthy_count)
    
    class_weights = {0: weight_healthy, 1: weight_unhealthy}  # 0=healthy, 1=unhealthy
    
    print(f"   Class weights: Healthy={weight_healthy:.3f}, Unhealthy={weight_unhealthy:.3f}")
    return class_weights

# Criar geradores
print("=== CRIANDO GERADORES BINÁRIOS OTIMIZADOS ===")
train_gen_tomato, val_gen_tomato, test_gen_tomato = criar_classificao_binaria(dataset_tomato, config)
train_gen_potato, val_gen_potato, test_gen_potato = criar_classificao_binaria(dataset_potato, config)
train_gen_pepper, val_gen_pepper, test_gen_pepper = criar_classificao_binaria(dataset_pepper, config)

print(f"✅ Geradores criados com class_mode='binary'")

# Criar modelos
print("\n=== CRIANDO MODELOS BINÁRIOS OTIMIZADOS ===")
modelo_tomato = criar_modelo('Tomato')
modelo_potato = criar_modelo('Potato')
modelo_pepper = criar_modelo('Pepper')

# Calcular class weights
print("\n=== CALCULANDO CLASS WEIGHTS ===")
cw_tomato = calcular_class_weights(dataset_tomato)
cw_potato = calcular_class_weights(dataset_potato)
cw_pepper = calcular_class_weights(dataset_pepper)


=== CRIANDO GERADORES BINÁRIOS OTIMIZADOS ===
Found 11213 validated image filenames belonging to 2 classes.
Found 2396 validated image filenames belonging to 2 classes.
Found 2402 validated image filenames belonging to 2 classes.
Found 1507 validated image filenames belonging to 2 classes.
Found 322 validated image filenames belonging to 2 classes.
Found 323 validated image filenames belonging to 2 classes.
Found 1732 validated image filenames belonging to 2 classes.
Found 371 validated image filenames belonging to 2 classes.
Found 372 validated image filenames belonging to 2 classes.
✅ Geradores criados com class_mode='binary'

=== CRIANDO MODELOS BINÁRIOS OTIMIZADOS ===


2025-07-03 19:31:07.976698: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-03 19:31:08.428191: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-03 19:31:08.428228: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-03 19:31:08.428232: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2348] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
2025-07-03 19:31:08.432497: I external/lo

✅ Modelo binário Tomato: 24,136,961 parâmetros
✅ Modelo binário Potato: 24,136,961 parâmetros
✅ Modelo binário Pepper: 24,136,961 parâmetros

=== CALCULANDO CLASS WEIGHTS ===
   Class weights: Healthy=5.033, Unhealthy=0.555
   Class weights: Healthy=7.108, Unhealthy=0.538
   Class weights: Healthy=0.838, Unhealthy=1.241


In [None]:
# 3. TREINAMENTO OTIMIZADO COM CLASS WEIGHTS
def treinar_modelo_binario(modelo, especie, train_gen, val_gen, class_weights):
    """Treina modelo de classificação binária com class weights"""
    print(f"\n🚀 Treinando {especie}...")
    
    # Compilação
    modelo.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Callback
    callbacks = [
        EarlyStopping(
            monitor='val_accuracy',
            patience=15,
            restore_best_weights=True,
            min_delta=0.001
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.3,
            patience=6,
            min_lr=1e-8
        ),
        ModelCheckpoint(
            filepath=f'modelos_salvos/especialistas/modelo_binario_{especie.lower()}.h5',
            monitor='val_accuracy',
            save_best_only=True
        )
    ]
    
    # Treinamento
    history = modelo.fit(
        train_gen,
        epochs=40,
        validation_data=val_gen,
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    
    final_accuracy = max(history.history['val_accuracy'])
    print(f"✅ {especie} concluído! Melhor accuracy: {final_accuracy:.4f}")
    
    return history

# Treinar todos os modelos
os.makedirs('modelos_salvos', exist_ok=True)

print("=== TREINAMENTO DOS MODELOS BINÁRIOS ===")
history_tomato = treinar_modelo_binario(modelo_tomato, 'Tomato', train_gen_tomato, val_gen_tomato, cw_tomato)
history_potato = treinar_modelo_binario(modelo_potato, 'Potato', train_gen_potato, val_gen_potato, cw_potato)
history_pepper = treinar_modelo_binario(modelo_pepper, 'Pepper', train_gen_pepper, val_gen_pepper, cw_pepper)

print("\n🎯 TODOS OS MODELOS TREINADOS COM SUCESSO!")


=== TREINAMENTO DOS MODELOS BINÁRIOS ===

🚀 Treinando Tomato...
Epoch 1/40


2025-07-03 19:31:13.520236: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
2025-07-03 19:31:15.321000: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f618592e7d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-07-03 19:31:15.321028: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 5070 Ti, Compute Capability 12.0
2025-07-03 19:31:15.334028: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1751581875.396423  111812 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




  saving_api.save_model(


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
✅ Tomato concluído! Melhor accuracy: 0.9508

🚀 Treinando Potato...
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
✅ Potato concluído! Melhor accuracy: 0.9286

🚀 Treinando Pepper...
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoc

In [12]:
# 4. AVALIAÇÃO DO MODELO
from sklearn.metrics import (
    classification_report, 
    accuracy_score, 
    confusion_matrix, 
    roc_auc_score, 
    recall_score, 
    precision_score, 
    f1_score
    )

def avaliar_modelo(modelo, especie, test_gen, dataset_test):
    """Avaliação completa do modelo de classificação binária"""
    print(f"\n📊 Avaliando {especie}...")
    
    test_gen.reset()
    
    # Predições
    predictions_prob = modelo.predict(test_gen, verbose=0)
    predictions_class = (predictions_prob > 0.5).astype(int).flatten()
    
    # Classes verdadeiras
    true_classes = [1 if label == 'unhealthy' else 0 for label in dataset_test['y']]
    
    # Métricas
    accuracy = accuracy_score(true_classes, predictions_class)
    auc_score = roc_auc_score(true_classes, predictions_prob)
    cm = confusion_matrix(true_classes, predictions_class)
    recall = recall_score(true_classes, predictions_class)
    precision = precision_score(true_classes, predictions_class)
    f1 = f1_score(true_classes, predictions_class)

    # Métricas médicas
    tn, fp, fn, tp = cm.ravel()
    
    print(f"   🎯 Accuracy: {accuracy:.4f}")
    print(f"   🎯 AUC-ROC: {auc_score:.4f}")
    print(f"   🎯 Recall: {recall:.4f}")
    print(f"   🎯 Precision: {precision:.4f}")
    print(f"   🎯 F1-Score: {f1:.4f}")

    
    # Matriz de confusão
    print(f"   Matriz: [[{tn:3d}, {fp:3d}], [{fn:3d}, {tp:3d}]]")
    
    # Relatório
    print("\n   Classification Report:")
    print(classification_report(true_classes, predictions_class, target_names=['Healthy', 'Unhealthy'], zero_division=0))
    
    return {
        'accuracy': accuracy,
        'auc_roc': auc_score,
        'confusion_matrix': cm,
        'recall': recall,
        'precision': precision,
        'f1': f1
    }

# Avaliar todos os modelos
print("=== AVALIAÇÃO FINAL DOS MODELOS BINÁRIOS ===")
resultados_tomato = avaliar_modelo(modelo_tomato, 'Tomato', test_gen_tomato, dataset_tomato['test'])
resultados_potato = avaliar_modelo(modelo_potato, 'Potato', test_gen_potato, dataset_potato['test'])
resultados_pepper = avaliar_modelo(modelo_pepper, 'Pepper', test_gen_pepper, dataset_pepper['test'])

# Comparação final
print(f"\n=== COMPARAÇÃO FINAL ===")
resultados = [
    ('Tomato', resultados_tomato),
    ('Potato', resultados_potato), 
    ('Pepper', resultados_pepper)
]

for especie, resultado in resultados:
    qualidade = "🟢 EXCELENTE" if resultado['accuracy'] > 0.9 else "🟡 BOA" if resultado['accuracy'] > 0.7 else "🔴 INSUFICIENTE"
    print(f"   {especie}: {resultado['accuracy']:.4f} - {qualidade}")


=== AVALIAÇÃO FINAL DOS MODELOS BINÁRIOS ===

📊 Avaliando Tomato...
   🎯 Accuracy: 0.9409
   🎯 AUC-ROC: 0.9501
   🎯 Recall: 0.9847
   🎯 Precision: 0.9513
   🎯 F1-Score: 0.9677
   Matriz: [[130, 109], [ 33, 2130]]

   Classification Report:
              precision    recall  f1-score   support

     Healthy       0.80      0.54      0.65       239
   Unhealthy       0.95      0.98      0.97      2163

    accuracy                           0.94      2402
   macro avg       0.87      0.76      0.81      2402
weighted avg       0.94      0.94      0.94      2402


📊 Avaliando Potato...
   🎯 Accuracy: 0.9288
   🎯 AUC-ROC: 0.4975
   🎯 Recall: 1.0000
   🎯 Precision: 0.9288
   🎯 F1-Score: 0.9631
   Matriz: [[  0,  23], [  0, 300]]

   Classification Report:
              precision    recall  f1-score   support

     Healthy       0.00      0.00      0.00        23
   Unhealthy       0.93      1.00      0.96       300

    accuracy                           0.93       323
   macro avg       0.

In [None]:
# Salvar modelos finais
print("SALVANDO MODELOS OTIMIZADOS")
modelo_tomato.save('modelos_salvos/especialistas/especialista_tomato_binario_final.h5')
modelo_potato.save('modelos_salvos/especialistas/especialista_potato_binario_final.h5')
modelo_pepper.save('modelos_salvos/especialistas/especialista_pepper_binario_final.h5')