In [1]:
# CARREGAR DADOS PR√â-PROCESSADOS

import joblib
import numpy as np

print("=" * 80)
print("CARREGANDO DADOS PR√â-PROCESSADOS")
print("=" * 80)

# Carregar dados j√° tratados
data_package = joblib.load("diabetes_model/preprocessed_data.joblib")

X_train = data_package['X_train']
X_test = data_package['X_test']
y_train = data_package['y_train']
y_test = data_package['y_test']

print(f"‚úÖ Dados carregados:")
print(f"   X_train: {X_train.shape}")
print(f"   X_test: {X_test.shape}")
print(f"   y_train: {y_train.shape}")
print(f"   y_test: {y_test.shape}")

print("\n‚úÖ PRONTO PARA ALGORITMO GEN√âTICO!")

CARREGANDO DADOS PR√â-PROCESSADOS
‚úÖ Dados carregados:
   X_train: (95996, 18)
   X_test: (24000, 18)
   y_train: (95996,)
   y_test: (24000,)

‚úÖ PRONTO PARA ALGORITMO GEN√âTICO!


In [2]:
# CONFIGURA√á√ÉO R√ÅPIDA PARA TESTE - ALGORITMO GEN√âTICO OTIMIZADO

import random
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.linear_model import LogisticRegression
from deap import base, creator, tools, algorithms

print("=" * 80)
print("CONFIGURA√á√ÉO OTIMIZADA - TESTE R√ÅPIDO")
print("=" * 80)

# 1. FUN√á√ÉO DE CRIA√á√ÉO (j√° cria com tipos corretos)
def create_individual():
    """Cria indiv√≠duo com tipos corretos desde o in√≠cio"""
    return [
        random.uniform(0.01, 100.0),    # Gene 0: C (float)
        random.randint(0, 2),           # Gene 1: penalty_type (int)
        random.randint(0, 3),           # Gene 2: solver_type (int)
        random.randint(0, 1),           # Gene 3: class_weight_type (int)
        random.randint(500, 3000)       # Gene 4: max_iter (int)
    ]

# 2. FUN√á√ÉO DE AVALIA√á√ÉO (sem necessidade de clamping!)
def evaluate_individual(individual):
    """Avalia indiv√≠duo - sem clamping necess√°rio"""
    C, penalty_type, solver_type, class_weight_type, max_iter = individual
    
    # Mapear para valores reais
    penalty_map = {0: 'l2', 1: 'l1', 2: 'elasticnet'}
    solver_map = {0: 'lbfgs', 1: 'liblinear', 2: 'saga', 3: 'newton-cg'}
    class_weight_map = {0: None, 1: 'balanced'}
    
    penalty = penalty_map[penalty_type]
    solver = solver_map[solver_type]
    class_weight = class_weight_map[class_weight_type]
    
    # Validar combina√ß√µes inv√°lidas
    if penalty == 'l1' and solver not in ['liblinear', 'saga']:
        return (-1000,)
    if penalty == 'elasticnet' and solver != 'saga':
        return (-1000,)
    
    try:
        model = LogisticRegression(
            C=C,
            penalty=penalty,
            solver=solver,
            class_weight=class_weight,
            max_iter=int(max_iter),
            random_state=42,
            n_jobs=-1
        )
        
        # TESTE R√ÅPIDO: apenas 2 folds (33% mais r√°pido)
        scorer = make_scorer(f1_score)
        scores = cross_val_score(model, X_train, y_train, cv=2, scoring=scorer, n_jobs=-1)
        
        return (scores.mean(),)
    
    except:
        return (-1000,)

# 3. CROSSOVER H√çBRIDO (preserva tipos inteiros)
def hybrid_crossover(ind1, ind2):
    """Crossover que preserva tipos: blend para float, uniform para inteiros"""
    child1, child2 = creator.Individual(ind1[:]), creator.Individual(ind2[:])
    
    # Gene 0 (C - float): Blend crossover
    alpha = 0.5
    gamma = (1 + 2 * alpha) * random.random() - alpha
    child1[0] = (1 - gamma) * ind1[0] + gamma * ind2[0]
    child2[0] = (1 - gamma) * ind2[0] + gamma * ind1[0]
    
    # Limitar C entre 0.01 e 100.0
    child1[0] = max(0.01, min(100.0, child1[0]))
    child2[0] = max(0.01, min(100.0, child2[0]))
    
    # Genes 1-4 (inteiros): Uniform crossover (50% de chance de trocar)
    for i in range(1, 5):
        if random.random() < 0.5:
            child1[i], child2[i] = ind2[i], ind1[i]
    
    return child1, child2

# 4. MUTA√á√ÉO (j√° otimizada para tipos mistos)
def hybrid_mutate(individual):
    """Muta√ß√£o que respeita tipos"""
    # Gene 0 (C - float): multiplica√ß√£o com fator aleat√≥rio
    if random.random() < 0.3:
        individual[0] *= random.uniform(0.5, 2.0)
        individual[0] = max(0.01, min(100.0, individual[0]))
    
    # Genes 1-4 (inteiros): nova amostragem aleat√≥ria
    if random.random() < 0.3:
        individual[1] = random.randint(0, 2)
    if random.random() < 0.3:
        individual[2] = random.randint(0, 3)
    if random.random() < 0.3:
        individual[3] = random.randint(0, 1)
    if random.random() < 0.3:
        individual[4] = random.randint(500, 3000)
    
    return individual,

# 5. CONFIGURAR DEAP
# Limpar defini√ß√µes anteriores
if hasattr(creator, "FitnessMax"):
    del creator.FitnessMax
if hasattr(creator, "Individual"):
    del creator.Individual

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate_individual)
toolbox.register("mate", hybrid_crossover)      # ‚Üê Crossover h√≠brido!
toolbox.register("mutate", hybrid_mutate)       # ‚Üê Muta√ß√£o h√≠brida!
toolbox.register("select", tools.selTournament, tournsize=3)

print("\n‚úÖ Configura√ß√£o completa!")
print("   ‚Ä¢ create_individual: Tipos corretos ‚úì")
print("   ‚Ä¢ evaluate_individual: Sem clamping ‚úì")
print("   ‚Ä¢ hybrid_crossover: Preserva inteiros ‚úì")
print("   ‚Ä¢ hybrid_mutate: Otimizado ‚úì")

CONFIGURA√á√ÉO OTIMIZADA - TESTE R√ÅPIDO

‚úÖ Configura√ß√£o completa!
   ‚Ä¢ create_individual: Tipos corretos ‚úì
   ‚Ä¢ evaluate_individual: Sem clamping ‚úì
   ‚Ä¢ hybrid_crossover: Preserva inteiros ‚úì
   ‚Ä¢ hybrid_mutate: Otimizado ‚úì


In [3]:
# EXECU√á√ÉO R√ÅPIDA - TESTE (5-10 minutos)

print("\n" + "=" * 80)
print("EXECUTANDO TESTE R√ÅPIDO")
print("=" * 80)

random.seed(42)
np.random.seed(42)

# CONFIGURA√á√ÉO R√ÅPIDA
POPULATION_SIZE = 15   # Reduzido de 30 (50% mais r√°pido)
NGEN = 10              # Reduzido de 20 (50% mais r√°pido)
CXPB = 0.7
MUTPB = 0.3

print(f"\nüìä Par√¢metros do teste r√°pido:")
print(f"   ‚Ä¢ Popula√ß√£o: {POPULATION_SIZE} (reduzida)")
print(f"   ‚Ä¢ Gera√ß√µes: {NGEN} (reduzida)")
print(f"   ‚Ä¢ Cross-validation: 2 folds (reduzida)")
print(f"   ‚Ä¢ Crossover: {CXPB}")
print(f"   ‚Ä¢ Muta√ß√£o: {MUTPB}")
print(f"\n‚è±Ô∏è  Tempo estimado: 5-10 minutos")

# Criar popula√ß√£o
population = toolbox.population(n=POPULATION_SIZE)

# Estat√≠sticas
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

# Hall of Fame
hof = tools.HallOfFame(5)

print("\nüöÄ Iniciando evolu√ß√£o...")
print("-" * 80)

# EXECUTAR
population, logbook = algorithms.eaSimple(
    population, 
    toolbox, 
    cxpb=CXPB, 
    mutpb=MUTPB, 
    ngen=NGEN,
    stats=stats, 
    halloffame=hof, 
    verbose=True
)

print("\n" + "=" * 80)
print("‚úÖ TESTE CONCLU√çDO!")
print("=" * 80)


EXECUTANDO TESTE R√ÅPIDO

üìä Par√¢metros do teste r√°pido:
   ‚Ä¢ Popula√ß√£o: 15 (reduzida)
   ‚Ä¢ Gera√ß√µes: 10 (reduzida)
   ‚Ä¢ Cross-validation: 2 folds (reduzida)
   ‚Ä¢ Crossover: 0.7
   ‚Ä¢ Muta√ß√£o: 0.3

‚è±Ô∏è  Tempo estimado: 5-10 minutos

üöÄ Iniciando evolu√ß√£o...
--------------------------------------------------------------------------------




gen	nevals	avg     	std    	min  	max     
0  	15    	-199.287	400.356	-1000	0.891196




1  	12    	0.887258	0.0091233	0.863998	0.890967




2  	15    	-266.013	442.611  	-1000   	0.890983




3  	12    	-65.8364	249.666  	-1000   	0.891056




4  	10    	-65.8364	249.666  	-1000   	0.891056




5  	12    	-65.835 	249.666  	-1000   	0.891056




6  	11    	0.891032	5.03379e-05	0.8909  	0.891056




7  	15    	0.891055	3.72437e-05	0.89098 	0.891168




8  	13    	0.89105 	7.5871e-05 	0.890815	0.891168




9  	7     	0.891094	7.65238e-05	0.890951	0.891168




10 	12    	0.891152	4.25207e-05	0.891032	0.891168

‚úÖ TESTE CONCLU√çDO!


In [7]:
# CALCULAR M√âTRICAS DO MODELO OTIMIZADO

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("Calculando m√©tricas do modelo otimizado...")

# Pegar melhor indiv√≠duo do Hall of Fame
best_individual = hof[0]
C_best, penalty_type, solver_type, class_weight_type, max_iter_best = best_individual

# Mapear para valores reais
penalty_map = {0: 'l2', 1: 'l1', 2: 'elasticnet'}
solver_map = {0: 'lbfgs', 1: 'liblinear', 2: 'saga', 3: 'newton-cg'}
class_weight_map = {0: None, 1: 'balanced'}

penalty_best = penalty_map[penalty_type]
solver_best = solver_map[solver_type]
class_weight_best = class_weight_map[class_weight_type]

# Treinar modelo final com melhor configura√ß√£o
best_model = LogisticRegression(
    C=C_best,
    penalty=penalty_best,
    solver=solver_best,
    class_weight=class_weight_best,
    max_iter=int(max_iter_best),
    random_state=42
)

best_model.fit(X_train, y_train)
y_pred_ga = best_model.predict(X_test)

# Calcular m√©tricas
accuracy_ga = accuracy_score(y_test, y_pred_ga) * 100
precision_ga = precision_score(y_test, y_pred_ga) * 100
recall_ga = recall_score(y_test, y_pred_ga) * 100
f1_ga = f1_score(y_test, y_pred_ga) * 100

print(f"‚úÖ M√©tricas calculadas:")
print(f"   Accuracy: {accuracy_ga:.2f}%")
print(f"   Precision: {precision_ga:.2f}%")
print(f"   Recall: {recall_ga:.2f}%")
print(f"   F1 Score: {f1_ga:.2f}%")

Calculando m√©tricas do modelo otimizado...




‚úÖ M√©tricas calculadas:
   Accuracy: 89.46%
   Precision: 90.81%
   Recall: 87.82%
   F1 Score: 89.29%


In [8]:
# COMPARA√á√ÉO: ORIGINAL vs OTIMIZADO

import pandas as pd

print("=" * 80)
print("COMPARA√á√ÉO: MODELO ORIGINAL vs OTIMIZADO")
print("=" * 80)

# Carregar modelo original
model_package = joblib.load("diabetes_model/diabetes_model.joblib")
original_model = model_package['model']
y_pred_original = original_model.predict(X_test)

# M√©tricas do original
accuracy_original = accuracy_score(y_test, y_pred_original) * 100
precision_original = precision_score(y_test, y_pred_original) * 100
recall_original = recall_score(y_test, y_pred_original) * 100
f1_original = f1_score(y_test, y_pred_original) * 100

# Compara√ß√£o
comparison_df = pd.DataFrame({
    'M√©trica': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Original': [accuracy_original, precision_original, recall_original, f1_original],
    'Otimizado (AG)': [accuracy_ga, precision_ga, recall_ga, f1_ga],
    'Melhoria': [
        accuracy_ga - accuracy_original,
        precision_ga - precision_original,
        recall_ga - recall_original,
        f1_ga - f1_original
    ]
})

print("\n")
print(comparison_df.round(2).to_string(index=False))

# An√°lise
improvement = f1_ga - f1_original

print(f"\n{'='*80}")
print("AN√ÅLISE DA MELHORIA")
print(f"{'='*80}")

print(f"\nüìä F1 Score:")
print(f"   Original: {f1_original:.2f}%")
print(f"   Otimizado: {f1_ga:.2f}%")
print(f"   Melhoria: {improvement:+.2f} pontos percentuais")
print(f"   Melhoria relativa: {(improvement/f1_original*100):+.2f}%")

if improvement > 0.5:
    print("\n‚úÖ EXCELENTE! Melhoria significativa!")
    print("   Recomenda√ß√£o: Salvar e usar este modelo")
elif improvement > 0:
    print("\n‚úÖ BOM! Pequena melhoria")
    print("   Recomenda√ß√£o: Considere rodar vers√£o completa (20 gera√ß√µes, pop=30)")
else:
    print("\n‚ö†Ô∏è  Sem melhoria ou piorou")
    print("   Recomenda√ß√£o: Manter modelo original")

COMPARA√á√ÉO: MODELO ORIGINAL vs OTIMIZADO


  M√©trica  Original  Otimizado (AG)  Melhoria
 Accuracy     89.46           89.46      0.00
Precision     90.85           90.81     -0.05
   Recall     87.76           87.82      0.06
 F1 Score     89.28           89.29      0.01

AN√ÅLISE DA MELHORIA

üìä F1 Score:
   Original: 89.28%
   Otimizado: 89.29%
   Melhoria: +0.01 pontos percentuais
   Melhoria relativa: +0.01%

‚úÖ BOM! Pequena melhoria
   Recomenda√ß√£o: Considere rodar vers√£o completa (20 gera√ß√µes, pop=30)
