In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
import cma
import numpy as np

# Define a simple neural network (e.g., a 2-layer fully connected network)
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the fitness function for CMA-ES (this will be the loss of the NN on the training set)
def fitness_function(weights, model, dataloader, criterion):
    # Unflatten the weights to match the model's architecture
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(weights[idx: idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    # Calculate the loss (fitness) on the validation data
    model.eval()
    total_loss = 0
    for data, target in dataloader:
        output = model(data)
        loss = criterion(output, target)
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# Function to flatten model weights to a 1D vector (required by CMA-ES)
def flatten_weights(model):
    return np.concatenate([param.detach().cpu().numpy().flatten() for param in model.parameters()])

# Main function to apply CMA-ES optimization on PyTorch model weights
def optimize_nn_with_cma(model, dataloader, input_size, hidden_size, output_size, max_generations=10, population_size=20):
    # Define the loss function
    criterion = nn.CrossEntropyLoss()

    # Initialize CMA-ES parameters
    initial_weights = flatten_weights(model)  # Flatten the initial model weights
    sigma = 0.1  # Initial step size for CMA-ES
    
    # Apply CMA-ES
    es = cma.CMAEvolutionStrategy(initial_weights, sigma, {'popsize': population_size})

    # Optimization loop
    for generation in range(max_generations):
        # Sample a new population of weights
        weights_population = es.ask()
        
        # Evaluate fitness for each candidate solution
        fitness_values = []
        for weights in weights_population:
            fitness = fitness_function(weights, model, dataloader, criterion)
            fitness_values.append(fitness)
        
        # Tell CMA-ES about the fitness of the solutions
        es.tell(weights_population, fitness_values)
        
        # Output the best solution
        best_fitness = min(fitness_values)
        best_solution = weights_population[np.argmin(fitness_values)]
        print(f'Generation {generation + 1}/{max_generations} - Best Fitness: {best_fitness}')
        
        # Check if the stopping criterion is met (for example, early stopping based on fitness)
        if best_fitness < 0.1:  # Example threshold
            print(f"Optimization converged at generation {generation + 1}")
            break

    # Return the best model with optimized weights
    best_weights = es.result.xbest
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(best_weights[idx: idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    return model

# Example: training data (use a simple dataset for demonstration)
from torch.utils.data import DataLoader, TensorDataset

torch.manual_seed(42)
X = torch.randn(100, 10)  # 100 samples, 10 features
y = torch.randint(0, 2, (100,))  # Binary classification (0 or 1)

# Create a DataLoader for the dataset
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model
input_size = 10
hidden_size = 64
output_size = 2  # Binary classification
model = SimpleNN(input_size, hidden_size, output_size)

# Optimize the model with CMA-ES
optimized_model = optimize_nn_with_cma(model, dataloader, input_size, hidden_size, output_size, max_generations=20, population_size=50)


(25_w,50)-aCMA-ES (mu_w=14.0,w_1=14%) in dimension 834 (seed=765570, Fri Dec 20 15:11:15 2024)
Generation 1/20 - Best Fitness: 0.6615164130926132
Generation 2/20 - Best Fitness: 0.6134639084339142
Generation 3/20 - Best Fitness: 0.5786204263567924
Generation 4/20 - Best Fitness: 0.576466754078865
Generation 5/20 - Best Fitness: 0.5773473531007767
Generation 6/20 - Best Fitness: 0.5842030569911003
Generation 7/20 - Best Fitness: 0.5943636372685432
Generation 8/20 - Best Fitness: 0.5711185559630394
Generation 9/20 - Best Fitness: 0.5828753709793091
Generation 10/20 - Best Fitness: 0.5479419082403183
Generation 11/20 - Best Fitness: 0.5477472394704819
Generation 12/20 - Best Fitness: 0.5302662998437881
Generation 13/20 - Best Fitness: 0.5288611426949501
Generation 14/20 - Best Fitness: 0.5548508763313293
Generation 15/20 - Best Fitness: 0.5456753373146057
Generation 16/20 - Best Fitness: 0.5511695593595505
Generation 17/20 - Best Fitness: 0.5288901291787624
Generation 18/20 - Best Fitness

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the same neural network architecture
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Particle Swarm Optimization implementation
class PSO:
    def __init__(self, n_particles, dim, bounds=None):
        self.n_particles = n_particles
        self.dim = dim
        self.bounds = bounds
        
        # Initialize particle positions and velocities
        self.positions = np.random.randn(n_particles, dim)
        self.velocities = np.random.randn(n_particles, dim) * 0.1
        
        # Initialize best positions and fitness
        self.pbest_positions = self.positions.copy()
        self.pbest_scores = np.full(n_particles, np.inf)
        self.gbest_position = None
        self.gbest_score = np.inf
        
        # PSO parameters
        self.w = 0.729  # Inertia weight
        self.c1 = 2.05  # Cognitive parameter
        self.c2 = 2.05  # Social parameter

    def optimize(self, fitness_func, max_iter):
        for iteration in range(max_iter):
            # Evaluate fitness for all particles
            for i in range(self.n_particles):
                fitness = fitness_func(self.positions[i])
                
                # Update personal best
                if fitness < self.pbest_scores[i]:
                    self.pbest_scores[i] = fitness
                    self.pbest_positions[i] = self.positions[i].copy()
                
                # Update global best
                if fitness < self.gbest_score:
                    self.gbest_score = fitness
                    self.gbest_position = self.positions[i].copy()
            
            # Update velocities and positions
            r1, r2 = np.random.rand(2)
            self.velocities = (self.w * self.velocities + 
                             self.c1 * r1 * (self.pbest_positions - self.positions) +
                             self.c2 * r2 * (self.gbest_position - self.positions))
            
            self.positions += self.velocities
            
            # Optional: Apply bounds if specified
            if self.bounds is not None:
                self.positions = np.clip(self.positions, self.bounds[0], self.bounds[1])
            
            if iteration % 5 == 0:
                print(f'Iteration {iteration}/{max_iter} - Best Fitness: {self.gbest_score}')
            
            # Early stopping condition
            if self.gbest_score < 0.1:
                print(f"Optimization converged at iteration {iteration}")
                break
        
        return self.gbest_position, self.gbest_score

# Fitness function for neural network
def fitness_function(weights, model, dataloader, criterion):
    # Unflatten the weights to match the model's architecture
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(weights[idx:idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    # Calculate the loss (fitness) on the validation data
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data, target in dataloader:
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
    
    return total_loss / len(dataloader)

# Function to flatten model weights
def flatten_weights(model):
    return np.concatenate([param.detach().cpu().numpy().flatten() for param in model.parameters()])

# Main function to apply PSO optimization on PyTorch model weights
def optimize_nn_with_pso(model, dataloader, input_size, hidden_size, output_size, max_iterations=100, n_particles=50):
    criterion = nn.CrossEntropyLoss()
    
    # Get the total number of parameters in the model
    n_params = sum(p.numel() for p in model.parameters())
    
    # Initialize PSO
    pso = PSO(n_particles=n_particles, dim=n_params)
    
    # Create a wrapper for the fitness function
    def fitness_wrapper(weights):
        return fitness_function(weights, model, dataloader, criterion)
    
    # Run PSO optimization
    best_weights, best_fitness = pso.optimize(fitness_wrapper, max_iterations)
    
    # Update the model with the best weights found
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(best_weights[idx:idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    return model

# Example usage
if __name__ == "__main__":
    # Create synthetic dataset
    X = torch.randn(100, 10)  # 100 samples, 10 features
    y = torch.randint(0, 2, (100,))  # Binary classification
    
    # Create DataLoader
    dataset = torch.utils.data.TensorDataset(X, y)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Initialize model
    input_size = 10
    hidden_size = 64
    output_size = 2
    model = SimpleNN(input_size, hidden_size, output_size)
    
    # Optimize the model with PSO
    optimized_model = optimize_nn_with_pso(
        model, 
        dataloader,
        input_size,
        hidden_size,
        output_size,
        max_iterations=100,
        n_particles=50
    )

Iteration 0/100 - Best Fitness: 4.818201780319214
Iteration 5/100 - Best Fitness: 1.510480074211955
Iteration 10/100 - Best Fitness: 1.0236693285405636
Iteration 15/100 - Best Fitness: 1.0236693285405636
Iteration 20/100 - Best Fitness: 0.8858274891972542
Iteration 25/100 - Best Fitness: 0.8579841293394566
Iteration 30/100 - Best Fitness: 0.8579841293394566
Iteration 35/100 - Best Fitness: 0.8579841293394566
Iteration 40/100 - Best Fitness: 0.7846528347581625
Iteration 45/100 - Best Fitness: 0.7846528347581625
Iteration 50/100 - Best Fitness: 0.784250439144671
Iteration 55/100 - Best Fitness: 0.784250439144671
Iteration 60/100 - Best Fitness: 0.7826134851202369
Iteration 65/100 - Best Fitness: 0.7629430741071701
Iteration 70/100 - Best Fitness: 0.7038721889257431
Iteration 75/100 - Best Fitness: 0.6391522977501154
Iteration 80/100 - Best Fitness: 0.6341567635536194
Iteration 85/100 - Best Fitness: 0.6341567635536194
Iteration 90/100 - Best Fitness: 0.6341567635536194
Iteration 95/100 -

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Define a simple neural network (same as before)
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Function to calculate model loss (fitness)
def calculate_loss(weights, model, dataloader, criterion):
    # Unflatten the weights to match the model's architecture
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(weights[idx: idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    # Calculate the loss on the validation data
    model.eval()
    total_loss = 0
    for data, target in dataloader:
        output = model(data)
        loss = criterion(output, target)
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

# Function to get flattened weights
def get_flat_weights(model):
    return np.concatenate([param.detach().cpu().numpy().flatten() for param in model.parameters()])

# Function to generate neighbor solution
def generate_neighbor(weights, temperature):
    # Scale perturbation based on temperature
    perturbation = np.random.normal(0, temperature, size=weights.shape)
    return weights + perturbation

# Main function to apply Simulated Annealing optimization
def optimize_nn_with_sa(model, dataloader, input_size, hidden_size, output_size, 
                       initial_temp=1.0, final_temp=0.01, cooling_rate=0.95,
                       iterations_per_temp=20, max_iterations=1000):
    
    criterion = nn.CrossEntropyLoss()
    
    # Initialize current solution
    current_weights = get_flat_weights(model)
    current_loss = calculate_loss(current_weights, model, dataloader, criterion)
    
    # Keep track of the best solution
    best_weights = current_weights.copy()
    best_loss = current_loss
    
    # Initialize temperature
    temperature = initial_temp
    iteration = 0
    
    while temperature > final_temp and iteration < max_iterations:
        for _ in range(iterations_per_temp):
            # Generate neighbor solution
            neighbor_weights = generate_neighbor(current_weights, temperature)
            neighbor_loss = calculate_loss(neighbor_weights, model, dataloader, criterion)
            
            # Calculate acceptance probability
            delta_loss = neighbor_loss - current_loss
            acceptance_prob = np.exp(-delta_loss / temperature)
            
            # Accept or reject the neighbor solution
            if delta_loss < 0 or np.random.random() < acceptance_prob:
                current_weights = neighbor_weights
                current_loss = neighbor_loss
                
                # Update best solution if necessary
                if current_loss < best_loss:
                    best_weights = current_weights.copy()
                    best_loss = current_loss
                    print(f'Iteration {iteration}, Temperature: {temperature:.6f}, New Best Loss: {best_loss:.6f}')
            
            # Early stopping if loss is good enough
            if best_loss < 0.1:
                print(f"Optimization converged at iteration {iteration}")
                break
            
        iteration += 1
        # Cool down the temperature
        temperature *= cooling_rate
        print(f'Temperature decreased to {temperature:.6f}, Current best loss: {best_loss:.6f}')
    
    # Set the best weights to the model
    idx = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data = torch.tensor(best_weights[idx: idx + num_params].reshape(param.shape), dtype=torch.float32)
        idx += num_params
    
    return model

# Example usage with synthetic dataset
if __name__ == "__main__":
    # Create synthetic dataset
    X = torch.randn(100, 10)  # 100 samples, 10 features
    y = torch.randint(0, 2, (100,))  # Binary classification
    
    # Create DataLoader
    dataset = TensorDataset(X, y)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Initialize model
    input_size = 10
    hidden_size = 64
    output_size = 2
    model = SimpleNN(input_size, hidden_size, output_size)
    
    # Optimize using Simulated Annealing
    optimized_model = optimize_nn_with_sa(
        model, 
        dataloader,
        input_size,
        hidden_size,
        output_size,
        initial_temp=0.3,
        final_temp=0.01,
        cooling_rate=0.99,
        iterations_per_temp=20,
        max_iterations=1000
    )

Temperature decreased to 0.297000, Current best loss: 0.697569
Temperature decreased to 0.294030, Current best loss: 0.697569
Temperature decreased to 0.291090, Current best loss: 0.697569
Temperature decreased to 0.288179, Current best loss: 0.697569
Temperature decreased to 0.285297, Current best loss: 0.697569
Temperature decreased to 0.282444, Current best loss: 0.697569
Temperature decreased to 0.279620, Current best loss: 0.697569
Temperature decreased to 0.276823, Current best loss: 0.697569
Temperature decreased to 0.274055, Current best loss: 0.697569
Temperature decreased to 0.271315, Current best loss: 0.697569
Temperature decreased to 0.268601, Current best loss: 0.697569
Temperature decreased to 0.265915, Current best loss: 0.697569
Temperature decreased to 0.263256, Current best loss: 0.697569
Temperature decreased to 0.260624, Current best loss: 0.697569
Temperature decreased to 0.258018, Current best loss: 0.697569
Temperature decreased to 0.255437, Current best loss: 0

Epoch 1/10, Batch 1/16, Layer 1 Error: 12.338958740234375, Layer 2 Error: 1.5570337772369385, Output Layer Error: 1.2001296281814575
Epoch 1/10, Batch 11/16, Layer 1 Error: 11.755592346191406, Layer 2 Error: 1.5227323770523071, Output Layer Error: 1.3459597826004028
Epoch 1 Accuracy: 9.20%
Epoch 2/10, Batch 1/16, Layer 1 Error: 11.973122596740723, Layer 2 Error: 1.5104399919509888, Output Layer Error: 1.3485537767410278
Epoch 2/10, Batch 11/16, Layer 1 Error: 11.890755653381348, Layer 2 Error: 1.5272572040557861, Output Layer Error: 1.2575950622558594
Epoch 2 Accuracy: 9.20%
Epoch 3/10, Batch 1/16, Layer 1 Error: 11.53770637512207, Layer 2 Error: 1.5421624183654785, Output Layer Error: 1.2790231704711914
Epoch 3/10, Batch 11/16, Layer 1 Error: 11.588460922241211, Layer 2 Error: 1.4892184734344482, Output Layer Error: 1.2089152336120605
Epoch 3 Accuracy: 9.20%
Epoch 4/10, Batch 1/16, Layer 1 Error: 11.133646965026855, Layer 2 Error: 1.4296085834503174, Output Layer Error: 1.293880701065