# AutoModelizer
---

La idea de este proyecto es encontrar el mejor modelo de CNN que se adapte al dataset correspondiente, para ello usando algoritmos evolutivos. Este tipo de soluciones se conocen como neuroevoluciones

A continuación un ejemplo básico de como funcionan este tipo de algoritmos

In [1]:
import numpy as np

def fitness(x):
    return x ** 2

population_size = 10
population = np.random.uniform(-10, 10, population_size)

In [2]:
def select_parent_tournament(population, scores, k=3):
    selection_ix = np.random.randint(len(population), size=k)
    selected = population[selection_ix]
    ix = np.argmax(scores[selection_ix])
    return selected[ix]

def crossover(p1, p2):
    child = (p1 + p2) / 2
    return child

def mutate(x):
    mutation_chance = 0.1
    if np.random.rand() < mutation_chance:
        x += np.random.uniform(-1, 1)
    return x

In [3]:
n_generations = 200

for generation in range(n_generations):
    scores = np.array([fitness(x) for x in population])
    new_population = []
    for _ in range(population_size):
        parent1 = select_parent_tournament(population, scores)
        parent2 = select_parent_tournament(population, scores)
        child = crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)
    population = np.array(new_population)
    best_score = np.max(scores)
    print(f"Generación {generation}, x = {child} Mejor puntuación {best_score}")

best_solution = population[np.argmax(scores)]
print(f"Mejor solución: x = {best_solution}, f(x) = {fitness(best_solution)}")


Generación 0, x = -1.7112917028591368 Mejor puntuación 78.39434126885519
Generación 1, x = -5.014652561095836 Mejor puntuación 76.0335745846789
Generación 2, x = -8.64029803762594 Mejor puntuación 82.49949126710509
Generación 3, x = -8.754018786721723 Mejor puntuación 82.49949126710509
Generación 4, x = -8.258127067953343 Mejor puntuación 82.49949126710509
Generación 5, x = -9.08292305742513 Mejor puntuación 82.49949126710509
Generación 6, x = -9.539998522795793 Mejor puntuación 101.63080797885924
Generación 7, x = -9.71242100034397 Mejor puntuación 109.78006862553974
Generación 8, x = -10.148756340462278 Mejor puntuación 105.23416257778825
Generación 9, x = -10.148756340462278 Mejor puntuación 105.23416257778825
Generación 10, x = -10.258370366573253 Mejor puntuación 105.23416257778825
Generación 11, x = -10.258370366573253 Mejor puntuación 105.23416257778825
Generación 12, x = -10.258370366573253 Mejor puntuación 105.23416257778825
Generación 13, x = -11.201866966932329 Mejor puntuac

Preparamos el dataset de prueba

In [4]:
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preparar los DataLoader para los conjuntos de entrenamiento y validación
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
val_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=False)


  from .autonotebook import tqdm as notebook_tqdm


comprobamos si tenemos acceso a la GPU


In [5]:
if torch.cuda.is_available():
    print("CUDA (GPU) está disponible en tu sistema.")
else:
    print("CUDA (GPU) no está disponible en tu sistema.")

CUDA (GPU) está disponible en tu sistema.


Generamos un par de arquitecturas de prueba

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
    


Construimos una función que sea capaz de crear modelos en base a vectores que representen la arquitectura de la red.
De este modo el algorimo evolutivo puede ir adaptando y cambiando la red fácilmente

In [7]:
import torch

def build_cnn_from_individual(individual):
    layers = []
    num_layers = individual['num_conv_layers']
    out_channels_previous_layer = 1  # Imagen de entrada en escala de grises (1 canal para MNIST)

    for i in range(num_layers):
        out_channels = individual['filters'][i]
        kernel_size = individual['filter_sizes'][i]
        
        conv_layer = nn.Conv2d(out_channels_previous_layer, out_channels, kernel_size=kernel_size, padding=1)
        layers.append(conv_layer)
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

        out_channels_previous_layer = out_channels

    # Temporalmente crear un modelo para calcular el tamaño de salida de las capas convolucionales
    temp_model = nn.Sequential(*layers)

    # Calcular el tamaño de salida usando un tensor dummy
    dummy_input = torch.zeros(1, 1, 28, 28)  # Tamaño de entrada para MNIST
    output_size = temp_model(dummy_input).view(-1).shape[0]

    # Ahora, sabiendo el tamaño de salida, podemos definir las capas lineales correctamente
    layers.append(nn.Flatten())
    layers.append(nn.Linear(output_size, 10))  # Salida de 10 clases para MNIST
    return nn.Sequential(*layers)


population_size = 10
min_conv_layers = 1
max_conv_layers = 3
min_filters = 16
max_filters = 128
filter_sizes = [3, 5]




Definimos la población inicial

In [8]:
import random
from tqdm import tqdm

def generate_individual(min_conv_layers, max_conv_layers, min_filters, max_filters, filter_sizes, lr_min, lr_max):
    individual = {
        'num_conv_layers': random.randint(min_conv_layers, max_conv_layers),
        'filters': [],
        'filter_sizes': [],
        'learning_rate': random.uniform(lr_min, lr_max),
    }

    for _ in range(individual['num_conv_layers']):
        individual['filters'].append(random.randint(min_filters, max_filters))
        individual['filter_sizes'].append(random.choice(filter_sizes))
    
    # Agrega más parámetros según sea necesario, como capas completamente conectadas, etc.

    return individual

def initialize_population(pop_size, min_conv_layers, max_conv_layers, min_filters, max_filters, filter_sizes, lr_min, lr_max):
    return [generate_individual(min_conv_layers, max_conv_layers, min_filters, max_filters, filter_sizes, lr_min, lr_max) for _ in range(pop_size)]


In [9]:

# Ajusta estos valores según tus necesidades
population_size = 10
min_conv_layers = 1
max_conv_layers = 3
min_filters = 16
max_filters = 128
filter_sizes = [3, 5]
lr_min = 0.0001
lr_max = 0.01

population = initialize_population(population_size, min_conv_layers, max_conv_layers, min_filters, max_filters, filter_sizes, lr_min, lr_max)

epochs = 10

population


[{'num_conv_layers': 2,
  'filters': [106, 114],
  'filter_sizes': [3, 3],
  'learning_rate': 0.006497994066288489},
 {'num_conv_layers': 2,
  'filters': [121, 81],
  'filter_sizes': [3, 5],
  'learning_rate': 0.0041891796859719835},
 {'num_conv_layers': 3,
  'filters': [30, 31, 46],
  'filter_sizes': [3, 5, 3],
  'learning_rate': 0.00024720079790209633},
 {'num_conv_layers': 2,
  'filters': [34, 109],
  'filter_sizes': [5, 5],
  'learning_rate': 0.007874192181446684},
 {'num_conv_layers': 3,
  'filters': [118, 32, 20],
  'filter_sizes': [3, 3, 3],
  'learning_rate': 0.008895097326435177},
 {'num_conv_layers': 2,
  'filters': [18, 99],
  'filter_sizes': [5, 5],
  'learning_rate': 0.005630703484325661},
 {'num_conv_layers': 3,
  'filters': [41, 95, 66],
  'filter_sizes': [3, 3, 5],
  'learning_rate': 0.00962754585611119},
 {'num_conv_layers': 1,
  'filters': [82],
  'filter_sizes': [3],
  'learning_rate': 0.004547750730214854},
 {'num_conv_layers': 1,
  'filters': [112],
  'filter_sizes

definimos el entrenamiento y testeo

In [10]:
from tqdm import tqdm
import torch.optim as optim
import torch.nn as nn

def evaluate_individual(individual, train_loader, val_loader, device='cuda', epochs=5):
    # Construir el modelo basado en el individuo
    model = build_cnn_from_individual(individual).to(device)
    
    # Definir el optimizador y la función de pérdida
    optimizer = torch.optim.Adam(model.parameters(), lr=individual['learning_rate'])
    criterion = nn.CrossEntropyLoss()

    # Entrenamiento
    for epoch in range(epochs):
        model.train()
        progress_bar = tqdm(total=len(train_loader), desc=f'Epoch {epoch+1}/{epochs}', unit='batch')
        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()

            # Actualizar la barra de progreso con la última información de pérdida
            progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item())})
            progress_bar.update()  # Forzar la actualización de la barra de progreso
            
        progress_bar.close()  # Cerrar la barra de progreso al final de cada época

    # Evaluación
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in val_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = correct / total
    return accuracy  # Esta es la "aptitud" del individuo


Generamos la mutación

In [11]:
def mutate_individual(individual):
    """
    Mutar un individuo cambiando aleatoriamente sus hiperparámetros.
    """
    # Define los rangos de mutación para cada hiperparámetro
    mutation_rate = 0.1  # Probabilidad de mutar cada característica
    lr_range = (0.0001, 0.01)
    conv_range = (1, 5)

    if random.random() < mutation_rate:
        # Mutar la tasa de aprendizaje
        individual['learning_rate'] = random.uniform(*lr_range)

    for i in range(len(individual['num_conv_layers'])):
        if random.random() < mutation_rate:
            # Mutar el número de capas
            individual['num_conv_layers'][i] = random.randint(*conv_range)

    return individual

Y el cruce entre individuos

In [12]:
def crossover(parent1, parent2):
    """
    Realiza un cruce uniforme entre dos individuos.
    
    Args:
        parent1 (dict): El primer individuo padre.
        parent2 (dict): El segundo individuo padre.
    
    Returns:
        dict: Un nuevo individuo hijo.
    """
    child = {}
    
    for key in parent1:
        if random.random() < 0.5:
            child[key] = parent1[key]
        else:
            child[key] = parent2[key]
            
    return child

In [13]:
def evaluate_population(population, train_loader, val_loader, device):
    fitness_scores = []
    
    for individual in population:
        print(individual)
        fitness = evaluate_individual(individual, train_loader, val_loader, device)
        fitness_scores.append(fitness)
    
    return fitness_scores




Recopilamos lo que hemos realizado, hemos creado las posibles mutaciones sobre las arquitecturas, los posibles cruces, la evaluación de los modelos.


Queda realizar:
- Selección de reproducción: por torneo en principio para también  puede ser por torneo o ruleta
- Creación de la nueva generación usando las funciones de mutación y cruces
- Criterios de parada
- Registro de análisis

In [14]:
# Asumiendo que 'population' ya ha sido inicializada
fitness_scores = evaluate_population(population, train_loader, val_loader, device)

# Opcional: Almacenar los individuos y sus puntuaciones en una lista de tuplas y ordenarlos
population_with_scores = list(zip(population, fitness_scores))
population_with_scores.sort(key=lambda x: x[1], reverse=True)  # Ordena de mayor a menor aptitud

# Imprime los resultados
for i, (individual, score) in enumerate(population_with_scores):
    print(f"Individuo {individual}: Aptitud = {score}")

{'num_conv_layers': 2, 'filters': [106, 114], 'filter_sizes': [3, 3], 'learning_rate': 0.006497994066288489}


Epoch 1/5: 100%|██████████| 938/938 [00:14<00:00, 62.77batch/s, training_loss=0.063]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 63.42batch/s, training_loss=0.161]
Epoch 3/5: 100%|██████████| 938/938 [00:14<00:00, 65.13batch/s, training_loss=0.011]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 62.57batch/s, training_loss=0.007]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 62.91batch/s, training_loss=0.092]


{'num_conv_layers': 2, 'filters': [121, 81], 'filter_sizes': [3, 5], 'learning_rate': 0.0041891796859719835}


Epoch 1/5: 100%|██████████| 938/938 [00:15<00:00, 62.02batch/s, training_loss=0.097]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 63.91batch/s, training_loss=0.009]
Epoch 3/5: 100%|██████████| 938/938 [00:15<00:00, 60.75batch/s, training_loss=0.009]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 65.02batch/s, training_loss=0.201]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 64.18batch/s, training_loss=0.006]


{'num_conv_layers': 3, 'filters': [30, 31, 46], 'filter_sizes': [3, 5, 3], 'learning_rate': 0.00024720079790209633}


Epoch 1/5: 100%|██████████| 938/938 [00:12<00:00, 72.62batch/s, training_loss=0.072]
Epoch 2/5: 100%|██████████| 938/938 [00:13<00:00, 68.12batch/s, training_loss=0.012]
Epoch 3/5: 100%|██████████| 938/938 [00:14<00:00, 63.55batch/s, training_loss=0.026]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 63.77batch/s, training_loss=0.027]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 64.72batch/s, training_loss=0.119]


{'num_conv_layers': 2, 'filters': [34, 109], 'filter_sizes': [5, 5], 'learning_rate': 0.007874192181446684}


Epoch 1/5: 100%|██████████| 938/938 [00:13<00:00, 67.29batch/s, training_loss=0.001]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 63.14batch/s, training_loss=0.000]
Epoch 3/5: 100%|██████████| 938/938 [00:14<00:00, 63.10batch/s, training_loss=0.029]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 63.03batch/s, training_loss=0.148]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 62.92batch/s, training_loss=0.054]


{'num_conv_layers': 3, 'filters': [118, 32, 20], 'filter_sizes': [3, 3, 3], 'learning_rate': 0.008895097326435177}


Epoch 1/5: 100%|██████████| 938/938 [00:13<00:00, 68.59batch/s, training_loss=0.058]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 66.76batch/s, training_loss=0.132]
Epoch 3/5: 100%|██████████| 938/938 [00:13<00:00, 68.81batch/s, training_loss=0.012]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 62.70batch/s, training_loss=0.063]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 62.80batch/s, training_loss=0.032]


{'num_conv_layers': 2, 'filters': [18, 99], 'filter_sizes': [5, 5], 'learning_rate': 0.005630703484325661}


Epoch 1/5: 100%|██████████| 938/938 [00:14<00:00, 63.24batch/s, training_loss=0.009]
Epoch 2/5: 100%|██████████| 938/938 [00:13<00:00, 68.01batch/s, training_loss=0.066]
Epoch 3/5: 100%|██████████| 938/938 [00:13<00:00, 71.99batch/s, training_loss=0.015]
Epoch 4/5: 100%|██████████| 938/938 [00:12<00:00, 76.16batch/s, training_loss=0.073]
Epoch 5/5: 100%|██████████| 938/938 [00:13<00:00, 69.20batch/s, training_loss=0.016]


{'num_conv_layers': 3, 'filters': [41, 95, 66], 'filter_sizes': [3, 3, 5], 'learning_rate': 0.00962754585611119}


Epoch 1/5: 100%|██████████| 938/938 [00:14<00:00, 64.04batch/s, training_loss=0.142]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 63.23batch/s, training_loss=0.164]
Epoch 3/5: 100%|██████████| 938/938 [00:15<00:00, 62.38batch/s, training_loss=0.016]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 63.90batch/s, training_loss=0.019]
Epoch 5/5: 100%|██████████| 938/938 [00:13<00:00, 69.05batch/s, training_loss=0.012]


{'num_conv_layers': 1, 'filters': [82], 'filter_sizes': [3], 'learning_rate': 0.004547750730214854}


Epoch 1/5: 100%|██████████| 938/938 [00:13<00:00, 70.07batch/s, training_loss=0.092]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 65.43batch/s, training_loss=0.028]
Epoch 3/5: 100%|██████████| 938/938 [00:14<00:00, 65.16batch/s, training_loss=0.007]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 65.35batch/s, training_loss=0.047]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 65.33batch/s, training_loss=0.106]


{'num_conv_layers': 1, 'filters': [112], 'filter_sizes': [5], 'learning_rate': 0.007126761598648627}


Epoch 1/5: 100%|██████████| 938/938 [00:12<00:00, 75.66batch/s, training_loss=0.044]
Epoch 2/5: 100%|██████████| 938/938 [00:14<00:00, 63.89batch/s, training_loss=0.063]
Epoch 3/5: 100%|██████████| 938/938 [00:12<00:00, 75.27batch/s, training_loss=0.005]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 64.12batch/s, training_loss=0.071]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 64.13batch/s, training_loss=0.000]


{'num_conv_layers': 3, 'filters': [29, 77, 103], 'filter_sizes': [5, 3, 5], 'learning_rate': 0.002196719327628927}


Epoch 1/5: 100%|██████████| 938/938 [00:14<00:00, 62.97batch/s, training_loss=0.008]
Epoch 2/5: 100%|██████████| 938/938 [00:12<00:00, 73.33batch/s, training_loss=0.005]
Epoch 3/5: 100%|██████████| 938/938 [00:12<00:00, 73.26batch/s, training_loss=0.056]
Epoch 4/5: 100%|██████████| 938/938 [00:14<00:00, 64.78batch/s, training_loss=0.000]
Epoch 5/5: 100%|██████████| 938/938 [00:14<00:00, 64.95batch/s, training_loss=0.012]


Individuo 0: Aptitud = 0.9909
Individuo 1: Aptitud = 0.9887
Individuo 2: Aptitud = 0.9856
Individuo 3: Aptitud = 0.9845
Individuo 4: Aptitud = 0.9839
Individuo 5: Aptitud = 0.9825
Individuo 6: Aptitud = 0.9824
Individuo 7: Aptitud = 0.9809
Individuo 8: Aptitud = 0.9808
Individuo 9: Aptitud = 0.9763


In [15]:
for i, (individual, score) in enumerate(population_with_scores):
    print(f"Individuo {individual}: Aptitud = {score}")

Individuo {'num_conv_layers': 3, 'filters': [29, 77, 103], 'filter_sizes': [5, 3, 5], 'learning_rate': 0.002196719327628927}: Aptitud = 0.9909
Individuo {'num_conv_layers': 3, 'filters': [30, 31, 46], 'filter_sizes': [3, 5, 3], 'learning_rate': 0.00024720079790209633}: Aptitud = 0.9887
Individuo {'num_conv_layers': 2, 'filters': [106, 114], 'filter_sizes': [3, 3], 'learning_rate': 0.006497994066288489}: Aptitud = 0.9856
Individuo {'num_conv_layers': 3, 'filters': [41, 95, 66], 'filter_sizes': [3, 3, 5], 'learning_rate': 0.00962754585611119}: Aptitud = 0.9845
Individuo {'num_conv_layers': 2, 'filters': [121, 81], 'filter_sizes': [3, 5], 'learning_rate': 0.0041891796859719835}: Aptitud = 0.9839
Individuo {'num_conv_layers': 2, 'filters': [34, 109], 'filter_sizes': [5, 5], 'learning_rate': 0.007874192181446684}: Aptitud = 0.9825
Individuo {'num_conv_layers': 1, 'filters': [112], 'filter_sizes': [5], 'learning_rate': 0.007126761598648627}: Aptitud = 0.9824
Individuo {'num_conv_layers': 2, 