# Wine DATASET

In [3]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# --- Przygotowanie danych: wczytanie, skalowanie, podział i DataLoadery ---

# Wczytanie danych
data = load_wine()
X = data.data
y = data.target

# Standaryzacja cech
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Podział na zbiór train+val (60%) i test (40%) z zachowaniem rozkładu klas (stratyfikacja)
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, stratify=y, test_size=0.40, random_state=42
)

# Podział train+val na train (60%) i val (20%) także stratyfikacja
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, stratify=y_train_val, test_size=0.5, random_state=42
)

# Konwersja do tensorów PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Tworzenie datasetów i DataLoaderów
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# DEAW

In [4]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from itertools import product

# MLP for Wine dataset (13-16-3 architecture)
class MLP(nn.Module):
    def __init__(self, input_size=13, hidden_size=16, output_size=3):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        # Initialize weights in [-1, 1]
        nn.init.uniform_(self.fc1.weight, -1, 1)
        nn.init.uniform_(self.fc1.bias, -1, 1)
        nn.init.uniform_(self.fc2.weight, -1, 1)
        nn.init.uniform_(self.fc2.bias, -1, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # No activation on output
        return x

    def get_weights(self):
        # Flatten weights and biases into a vector
        return torch.cat([
            self.fc1.weight.flatten(),
            self.fc1.bias.flatten(),
            self.fc2.weight.flatten(),
            self.fc2.bias.flatten()
        ])

    def set_weights(self, weights):
        # Set weights from a flat vector
        idx = 0
        w1_size = self.input_size * self.hidden_size
        self.fc1.weight.data = weights[idx:idx+w1_size].reshape(self.hidden_size, self.input_size)
        idx += w1_size
        self.fc1.bias.data = weights[idx:idx+self.hidden_size]
        idx += self.hidden_size
        w2_size = self.hidden_size * self.output_size
        self.fc2.weight.data = weights[idx:idx+w2_size].reshape(self.output_size, self.hidden_size)
        idx += w2_size
        self.fc2.bias.data = weights[idx:idx+self.output_size]

# Compute accuracy for evaluation
def compute_accuracy(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = torch.argmax(outputs, dim=1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    return correct / total

# Fitness function using cross-entropy loss
def fitness(weights, model, train_loader, device):
    model.set_weights(weights.to(device))
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
    return total_loss / len(train_loader.dataset)

# DEAW Algorithm
def train_deaw(model, train_loader, device, NP=50, F=0.5, CR=0.9, max_generations=100, initial_lower=-1.0, initial_upper=1.0):
    num_weights = sum(p.numel() for p in model.parameters())
    lower_bounds = np.full(num_weights, initial_lower)
    upper_bounds = np.full(num_weights, initial_upper)
    population = np.random.uniform(initial_lower, initial_upper, (NP, num_weights))
    fitnesses = np.array([fitness(torch.tensor(p, dtype=torch.float32), model, train_loader, device) for p in population])

    for generation in range(max_generations):
        for i in range(NP):
            candidates = [j for j in range(NP) if j != i]
            a, b, c = np.random.choice(candidates, 3, replace=False)
            v = population[a] + F * (population[b] - population[c])

            for j in range(num_weights):
                if v[j] < lower_bounds[j]:
                    lower_bounds[j] *= 3
                    v[j] = lower_bounds[j]
                elif v[j] > upper_bounds[j]:
                    upper_bounds[j] *= 3
                    v[j] = upper_bounds[j]

            u = np.copy(population[i])
            j_rand = np.random.randint(0, num_weights)
            for j in range(num_weights):
                if np.random.rand() < CR or j == j_rand:
                    u[j] = v[j]

            u_tensor = torch.tensor(u, dtype=torch.float32)
            loss_u = fitness(u_tensor, model, train_loader, device)

            if loss_u < fitnesses[i]:
                population[i] = u.copy()
                fitnesses[i] = loss_u

    best_idx = np.argmin(fitnesses)
    best_weights = torch.tensor(population[best_idx], dtype=torch.float32)
    model.set_weights(best_weights.to(device))
    return model

# Grid search for hyperparameter optimization using validation set
def grid_search_deaw(train_loader, val_loader, device):
    # Define hyperparameter grid
    param_grid = {
        'NP': [30, 50, 70],
        'F': [0.3, 0.5, 0.8],
        'CR': [0.7, 0.9, 1.0],
        'max_generations': [50, 100, 150]
    }

    best_accuracy = 0.0
    best_params = None
    best_model = None

    # Iterate over all combinations of hyperparameters
    for NP, F, CR, max_generations in product(param_grid['NP'], param_grid['F'], param_grid['CR'], param_grid['max_generations']):
        print(f"Testing NP={NP}, F={F}, CR={CR}, max_generations={max_generations}")
        model = MLP(input_size=13, hidden_size=16, output_size=3).to(device)
        model = train_deaw(model, train_loader, device, NP=NP, F=F, CR=CR, max_generations=max_generations)
        val_accuracy = compute_accuracy(model, val_loader, device)
        print(f"Validation accuracy: {val_accuracy:.6f}")

        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_params = {'NP': NP, 'F': F, 'CR': CR, 'max_generations': max_generations}
            best_model = model

    print(f"\nBest hyperparameters: {best_params}")
    print(f"Best validation accuracy: {best_accuracy:.6f}")
    return best_model, best_params, best_accuracy


# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Perform grid search using validation set
best_model, best_params, best_val_accuracy = grid_search_deaw(train_loader, val_loader, device)

# Evaluate final model on train, validation, and test sets
train_accuracy = compute_accuracy(best_model, train_loader, device)
val_accuracy = compute_accuracy(best_model, val_loader, device)
test_accuracy = compute_accuracy(best_model, test_loader, device)
print(f"Final training accuracy: {train_accuracy:.6f}")
print(f"Final validation accuracy: {val_accuracy:.6f}")
print(f"Final test accuracy: {test_accuracy:.6f}")

Testing NP=30, F=0.3, CR=0.7, max_generations=50
Validation accuracy: 0.962264
Testing NP=30, F=0.3, CR=0.7, max_generations=100
Validation accuracy: 0.849057
Testing NP=30, F=0.3, CR=0.7, max_generations=150
Validation accuracy: 0.981132
Testing NP=30, F=0.3, CR=0.9, max_generations=50
Validation accuracy: 0.962264
Testing NP=30, F=0.3, CR=0.9, max_generations=100
Validation accuracy: 0.867925
Testing NP=30, F=0.3, CR=0.9, max_generations=150
Validation accuracy: 0.924528
Testing NP=30, F=0.3, CR=1.0, max_generations=50
Validation accuracy: 0.773585
Testing NP=30, F=0.3, CR=1.0, max_generations=100
Validation accuracy: 0.735849
Testing NP=30, F=0.3, CR=1.0, max_generations=150
Validation accuracy: 0.660377
Testing NP=30, F=0.5, CR=0.7, max_generations=50
Validation accuracy: 0.924528
Testing NP=30, F=0.5, CR=0.7, max_generations=100
Validation accuracy: 0.886792
Testing NP=30, F=0.5, CR=0.7, max_generations=150
Validation accuracy: 0.943396
Testing NP=30, F=0.5, CR=0.9, max_generation

In [None]:

def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
accuracy = evaluate_model(model, test_loader, device)
print(f"Dokładność na zbiorze testowym: {accuracy:.2f}%")

Dokładność na zbiorze testowym: 95.24%


In [None]:
def train_de(model, train_loader, device, NP=50, F=0.5, CR=0.9, max_generations=100, initial_lower=-1.0, initial_upper=1.0):
    num_weights = sum(p.numel() for p in model.parameters())
    population = np.random.uniform(initial_lower, initial_upper, (NP, num_weights))
    fitnesses = np.array([fitness(torch.tensor(p, dtype=torch.float32), model, train_loader, device) for p in population])

    for generation in range(max_generations):
        for i in range(NP):
            candidates = [j for j in range(NP) if j != i]
            a, b, c = np.random.choice(candidates, 3, replace=False)
            v = population[a] + F * (population[b] - population[c])

            # Stałe ograniczenie: ograniczamy wartości do zakresu
            v = np.clip(v, initial_lower, initial_upper)

            u = np.copy(population[i])
            j_rand = np.random.randint(0, num_weights)
            for j in range(num_weights):
                if np.random.rand() < CR or j == j_rand:
                    u[j] = v[j]

            u_tensor = torch.tensor(u, dtype=torch.float32)
            loss_u = fitness(u_tensor, model, train_loader, device)

            if loss_u < fitnesses[i]:
                population[i] = u.copy()
                fitnesses[i] = loss_u

    best_idx = np.argmin(fitnesses)
    best_weights = torch.tensor(population[best_idx], dtype=torch.float32)
    idx = 0
    for param in model.parameters():
        numel = param.numel()
        param.data = best_weights[idx:idx+numel].view(param.size()).to(device)
        idx += numel


In [None]:
model_de = MLP().to(device)
train_de(model_de, train_loader, device)
accuracy_de = evaluate_model(model_de, test_loader, device)
print(f"[DE] Dokładność na zbiorze testowym: {accuracy_de:.2f}%")


[DE] Dokładność na zbiorze testowym: 69.44%


# Adam

In [None]:
from sklearn.metrics import accuracy_score
from torch.optim import Adam
# Ewaluacja
def evaluate(model, X_test, y_test, device):
    model.eval()
    with torch.no_grad():
        outputs = model(X_test.to(device))
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
    acc = accuracy_score(y_test.numpy(), preds)
    print(f"Test Accuracy: {acc:.4f}")

# Uczenie Adamem
model_adam = MLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model_adam.parameters(), lr=0.01)
n_epochs = 50

print("Training with Adam...")
for epoch in range(n_epochs):
    model_adam.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model_adam(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

evaluate(model_adam, X_test, y_test, device)

Training with Adam...
Test Accuracy: 0.9206


# EDEADAM

In [None]:
import torch
import torch.nn as nn
import numpy as np
from scipy.stats import cauchy
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# MLP for Wine dataset (13-16-3 architecture)
class MLP(nn.Module):
    def __init__(self, input_size=13, hidden_size=16, output_size=3):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        # Initialize weights in [-1, 1]
        nn.init.uniform_(self.fc1.weight, -1, 1)
        nn.init.uniform_(self.fc1.bias, -1, 1)
        nn.init.uniform_(self.fc2.weight, -1, 1)
        nn.init.uniform_(self.fc2.bias, -1, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # No activation on output for cross-entropy
        return x

    def get_weights(self):
        # Flatten weights and biases into a vector
        return torch.cat([
            self.fc1.weight.flatten(),
            self.fc1.bias.flatten(),
            self.fc2.weight.flatten(),
            self.fc2.bias.flatten()
        ])

    def set_weights(self, weights):
        # Set weights from a flat vector
        idx = 0
        w1_size = self.input_size * self.hidden_size
        self.fc1.weight.data = weights[idx:idx+w1_size].reshape(self.hidden_size, self.input_size)
        idx += w1_size
        self.fc1.bias.data = weights[idx:idx+self.hidden_size]
        idx += self.hidden_size
        w2_size = self.hidden_size * self.output_size
        self.fc2.weight.data = weights[idx:idx+w2_size].reshape(self.output_size, self.hidden_size)
        idx += w2_size
        self.fc2.bias.data = weights[idx:idx+self.output_size]

# Compute cross-entropy loss (used for optimization)
def compute_loss(model, inputs, targets):
    outputs = model(inputs)
    loss_fn = nn.CrossEntropyLoss()
    return loss_fn(outputs, targets).item()

# Compute accuracy (for evaluation, for torch.long labels)
def compute_accuracy(model, inputs, targets):
    with torch.no_grad():
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim=1)
        correct = (predicted == targets).float().sum()
        accuracy = correct / inputs.size(0)
    return accuracy.item()

# Population-based Adam (P-Adam)
class PAdam:
    def __init__(self, population, alpha=0.1, gamma1=0.9, gamma2=0.99, gamma3=0.999, tau=1e-7):
        self.population = population
        self.alpha = alpha
        self.gamma1 = gamma1
        self.gamma2 = gamma2
        self.gamma3 = gamma3
        self.tau = tau
        self.m = [torch.zeros_like(ind) for ind in population]  # First moment
        self.n = [torch.zeros_like(ind) for ind in population]  # Second moment

    def step(self, model, inputs, targets, t):
        new_population = []
        fitnesses = []
        loss_fn = nn.CrossEntropyLoss()
        for i, (ind, m_i, n_i) in enumerate(zip(self.population, self.m, self.n)):
            model.set_weights(ind)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            loss.backward()

            # Compute gradients
            grads = torch.cat([
                model.fc1.weight.grad.flatten(),
                model.fc1.bias.grad.flatten(),
                model.fc2.weight.grad.flatten(),
                model.fc2.bias.grad.flatten()
            ])

            # Update moments
            m_i = self.gamma1 * m_i + (1 - self.gamma1) * grads
            n_i = self.gamma2 * n_i + (1 - self.gamma3) * (grads ** 2)

            # Bias correction
            m_hat = m_i / (1 - self.gamma1 ** t)
            n_hat = n_i / (1 - self.gamma3 ** t)

            # Update parameters
            new_ind = ind - self.alpha * m_hat / (torch.sqrt(n_hat) + self.tau)
            new_population.append(new_ind)
            fitnesses.append(compute_loss(model, inputs, targets))

            # Zero gradients
            model.zero_grad()

            self.m[i] = m_i
            self.n[i] = n_i

        self.population = new_population
        return fitnesses

# Modified CoBiDE (M-CoBiDE)
class MCoBiDE:
    def __init__(self, population, pb=0.5, ps=0.4):
        self.population = population
        self.pb = pb
        self.ps = ps
        self.rng = np.random.default_rng()
        self.F = [self._sample_F() for _ in population]
        self.CR = [self._sample_CR() for _ in population]

    def _sample_F(self):
        r = self.rng.random()
        if r < 0.5:
            return cauchy.rvs(loc=0.65, scale=0.1, random_state=self.rng)
        else:
            return cauchy.rvs(loc=1.0, scale=0.1, random_state=self.rng)

    def _sample_CR(self):
        r = self.rng.random()
        if r < 0.5:
            cr = cauchy.rvs(loc=0.1, scale=0.1, random_state=self.rng)
        else:
            cr = cauchy.rvs(loc=0.95, scale=0.1, random_state=self.rng)
        return np.clip(cr, 0, 1)

    def step(self, model, inputs, targets):
        fitnesses = [compute_loss(model, inputs, targets) for ind in self.population]
        best_idx = np.argmin(fitnesses)
        new_population = []

        # Compute covariance matrix for top ps proportion
        top_indices = np.argsort(fitnesses)[:int(self.ps * len(self.population))]
        top_pop = torch.stack([self.population[i] for i in top_indices])
        cov = torch.cov(top_pop.T)
        cov += 1e-6 * torch.eye(cov.shape[0])  # Add perturbation for stability
        eigvals, eigvecs = torch.linalg.eigh(cov)
        P = eigvecs

        for i, (ind, F_i, CR_i) in enumerate(zip(self.population, self.F, self.CR)):
            r1, r2 = self.rng.choice([j for j in range(len(self.population)) if j != i], 2, replace=False)
            v_i = ind + F_i * (self.population[best_idx] - ind) + F_i * (self.population[r1] - self.population[r2])

            r3 = self.rng.random()
            if r3 >= self.pb:
                u_i = ind.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_i[j] = v_i[j]
            else:
                x_prime = P.T @ ind
                v_prime = P.T @ v_i
                u_prime = x_prime.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_prime[j] = v_prime[j]
                u_i = P @ u_prime

            model.set_weights(u_i)
            u_fitness = compute_loss(model, inputs, targets)
            if u_fitness < fitnesses[i]:
                new_population.append(u_i)
                self.F[i] = self._sample_F()
                self.CR[i] = self._sample_CR()
            else:
                new_population.append(ind)

        self.population = new_population
        return fitnesses

# EDEAdam Algorithm
class EDEAdam:
    def __init__(self, model, pop_size=50, max_evals=25000, exchange_interval=5):
        self.model = model
        self.pop_size = pop_size
        self.max_evals = max_evals
        self.exchange_interval = exchange_interval
        self.dim = sum(p.numel() for p in model.parameters())
        # Verify dimension
        expected_dim = model.input_size * model.hidden_size + model.hidden_size + model.hidden_size * model.output_size + model.output_size
        assert self.dim == expected_dim, f"Dimension mismatch: got {self.dim}, expected {expected_dim}"
        # Initialize population
        self.population = [torch.rand(self.dim) * 2 - 1 for _ in range(pop_size)]
        self.sub_pop1 = self.population[:pop_size//2]
        self.sub_pop2 = self.population[pop_size//2:]
        self.p_adam = PAdam(self.sub_pop1)
        self.m_cobide = MCoBiDE(self.sub_pop2)

    def run(self, inputs, targets):
        t = 1
        eval_count = 0
        best_fitness = float('inf')
        best_individual = None

        while eval_count < self.max_evals:
            fitness1 = self.p_adam.step(self.model, inputs, targets, t)
            fitness2 = self.m_cobide.step(self.model, inputs, targets)
            eval_count += len(self.sub_pop1) + len(self.sub_pop2)

            best_idx1, worst_idx1 = np.argmin(fitness1), np.argmax(fitness1)
            best_idx2, worst_idx2 = np.argmin(fitness2), np.argmax(fitness2)

            if min(fitness1 + fitness2) < best_fitness:
                best_fitness = min(fitness1 + fitness2)
                best_individual = self.sub_pop1[best_idx1] if fitness1[best_idx1] < fitness2[best_idx2] else self.sub_pop2[best_idx2]

            if t % self.exchange_interval == 0:
                if fitness1[best_idx1] < fitness2[worst_idx2]:
                    self.sub_pop2[worst_idx2] = self.sub_pop1[best_idx1].clone()
                if fitness2[best_idx2] < fitness1[worst_idx1]:
                    self.sub_pop1[worst_idx1] = self.sub_pop2[best_idx2].clone()

            t += 1

        self.model.set_weights(best_individual)
        return best_fitness, eval_count

# Load and preprocess Wine dataset
data = load_wine()
X = data.data
y = data.target  # Use class indices (torch.long)

# Split data into train, validation, and test sets
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Define hyperparameter grid
pop_size_values = [25, 50, 100, 250]
max_evals_values = [500, 1000]
exchange_interval_values = [ 5, 10, 25, 50, 100]

# Grid search
best_val_accuracy = 0
best_params = {}
best_model_state = None
best_fitness = float('inf')
best_eval_count = 0

for pop_size in pop_size_values:
    for max_evals in max_evals_values:
        for exchange_interval in exchange_interval_values:
            print(f"Testing: pop_size={pop_size}, max_evals={max_evals}, exchange_interval={exchange_interval}")
            model = MLP(input_size=13, hidden_size=16, output_size=3)
            ede_adam = EDEAdam(model, pop_size=pop_size, max_evals=max_evals, exchange_interval=exchange_interval)
            fitness, eval_count = ede_adam.run(X_train, y_train)
            val_accuracy = compute_accuracy(model, X_val, y_val)
            print(f"Validation accuracy: {val_accuracy:.6f}")
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                best_params = {
                    'pop_size': pop_size,
                    'max_evals': max_evals,
                    'exchange_interval': exchange_interval
                }
                best_model_state = model.state_dict()
                best_fitness = fitness
                best_eval_count = eval_count

# Load best model state
model = MLP(input_size=13, hidden_size=16, output_size=3)
model.load_state_dict(best_model_state)

# Evaluate on train, validation, and test sets
model.eval()
train_accuracy = compute_accuracy(model, X_train, y_train)
val_accuracy = compute_accuracy(model, X_val, y_val)
test_accuracy = compute_accuracy(model, X_test, y_test)

# Print results
print("\nBest Hyperparameters:")
print(f"pop_size: {best_params['pop_size']}")
print(f"max_evals: {best_params['max_evals']}")
print(f"exchange_interval: {best_params['exchange_interval']}")
print(f"Best training cross-entropy loss: {best_fitness:.6f}")
print(f"Training accuracy: {train_accuracy:.6f}")
print(f"Validation accuracy: {val_accuracy:.6f}")
print(f"Test accuracy: {test_accuracy:.6f}")
print(f"Total evaluations: {best_eval_count}")

Testing: pop_size=25, max_evals=500, exchange_interval=5
Validation accuracy: 0.094340
Testing: pop_size=25, max_evals=500, exchange_interval=10
Validation accuracy: 0.716981
Testing: pop_size=25, max_evals=500, exchange_interval=25
Validation accuracy: 0.169811
Testing: pop_size=25, max_evals=500, exchange_interval=50
Validation accuracy: 0.433962
Testing: pop_size=25, max_evals=500, exchange_interval=100
Validation accuracy: 0.584906
Testing: pop_size=25, max_evals=1000, exchange_interval=5
Validation accuracy: 0.452830
Testing: pop_size=25, max_evals=1000, exchange_interval=10
Validation accuracy: 0.339623
Testing: pop_size=25, max_evals=1000, exchange_interval=25
Validation accuracy: 0.150943
Testing: pop_size=25, max_evals=1000, exchange_interval=50
Validation accuracy: 0.509434
Testing: pop_size=25, max_evals=1000, exchange_interval=100
Validation accuracy: 0.207547
Testing: pop_size=50, max_evals=500, exchange_interval=5
Validation accuracy: 0.509434
Testing: pop_size=50, max_ev

In [None]:
import torch
import torch.nn as nn
import numpy as np
from scipy.stats import cauchy
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# MLP for Wine dataset (13-16-3 architecture)
class MLP(nn.Module):
    def __init__(self, input_size=13, hidden_size=16, output_size=3):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        # Initialize weights in [-1, 1]
        nn.init.uniform_(self.fc1.weight, -1, 1)
        nn.init.uniform_(self.fc1.bias, -1, 1)
        nn.init.uniform_(self.fc2.weight, -1, 1)
        nn.init.uniform_(self.fc2.bias, -1, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # No activation on output for MSE
        return x

    def get_weights(self):
        # Flatten weights and biases into a vector
        return torch.cat([
            self.fc1.weight.flatten(),
            self.fc1.bias.flatten(),
            self.fc2.weight.flatten(),
            self.fc2.bias.flatten()
        ])

    def set_weights(self, weights):
        # Set weights from a flat vector
        idx = 0
        w1_size = self.input_size * self.hidden_size
        self.fc1.weight.data = weights[idx:idx+w1_size].reshape(self.hidden_size, self.input_size)
        idx += w1_size
        self.fc1.bias.data = weights[idx:idx+self.hidden_size]
        idx += self.hidden_size
        w2_size = self.hidden_size * self.output_size
        self.fc2.weight.data = weights[idx:idx+w2_size].reshape(self.output_size, self.hidden_size)
        idx += w2_size
        self.fc2.bias.data = weights[idx:idx+self.output_size]

# Compute MSE loss (used for optimization)
def compute_mse(model, inputs, targets):
    outputs = model(inputs)
    mse = torch.mean((outputs - targets) ** 2)
    return mse.item()

# Compute accuracy (for evaluation, for torch.long labels)
def compute_accuracy(model, inputs, targets):
    with torch.no_grad():
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim=1)
        correct = (predicted == targets).float().sum()
        accuracy = correct / inputs.size(0)
    return accuracy.item()

# Population-based Adam (P-Adam)
class PAdam:
    def __init__(self, population, alpha=0.1, gamma1=0.9, gamma2=0.99, gamma3=0.999, tau=1e-7):
        self.population = population
        self.alpha = alpha
        self.gamma1 = gamma1
        self.gamma2 = gamma2
        self.gamma3 = gamma3
        self.tau = tau
        self.m = [torch.zeros_like(ind) for ind in population]  # First moment
        self.n = [torch.zeros_like(ind) for ind in population]  # Second moment

    def step(self, model, inputs, targets, t):
        new_population = []
        fitnesses = []
        for i, (ind, m_i, n_i) in enumerate(zip(self.population, self.m, self.n)):
            model.set_weights(ind)
            outputs = model(inputs)
            loss = torch.mean((outputs - targets) ** 2)
            loss.backward()

            # Compute gradients
            grads = torch.cat([
                model.fc1.weight.grad.flatten(),
                model.fc1.bias.grad.flatten(),
                model.fc2.weight.grad.flatten(),
                model.fc2.bias.grad.flatten()
            ])

            # Update moments
            m_i = self.gamma1 * m_i + (1 - self.gamma1) * grads
            n_i = self.gamma2 * n_i + (1 - self.gamma3) * (grads ** 2)

            # Bias correction
            m_hat = m_i / (1 - self.gamma1 ** t)
            n_hat = n_i / (1 - self.gamma3 ** t)

            # Update parameters
            new_ind = ind - self.alpha * m_hat / (torch.sqrt(n_hat) + self.tau)
            new_population.append(new_ind)
            fitnesses.append(compute_mse(model, inputs, targets))

            # Zero gradients
            model.zero_grad()

            self.m[i] = m_i
            self.n[i] = n_i

        self.population = new_population
        return fitnesses

# Modified CoBiDE (M-CoBiDE)
class MCoBiDE:
    def __init__(self, population, pb=0.5, ps=0.4):
        self.population = population
        self.pb = pb
        self.ps = ps
        self.rng = np.random.default_rng()
        self.F = [self._sample_F() for _ in population]
        self.CR = [self._sample_CR() for _ in population]

    def _sample_F(self):
        r = self.rng.random()
        if r < 0.5:
            return cauchy.rvs(loc=0.65, scale=0.1, random_state=self.rng)
        else:
            return cauchy.rvs(loc=1.0, scale=0.1, random_state=self.rng)

    def _sample_CR(self):
        r = self.rng.random()
        if r < 0.5:
            cr = cauchy.rvs(loc=0.1, scale=0.1, random_state=self.rng)
        else:
            cr = cauchy.rvs(loc=0.95, scale=0.1, random_state=self.rng)
        return np.clip(cr, 0, 1)

    def step(self, model, inputs, targets):
        fitnesses = [compute_mse(model, inputs, targets) for ind in self.population]
        best_idx = np.argmin(fitnesses)
        new_population = []

        # Compute covariance matrix for top ps proportion
        top_indices = np.argsort(fitnesses)[:int(self.ps * len(self.population))]
        top_pop = torch.stack([self.population[i] for i in top_indices])
        cov = torch.cov(top_pop.T)
        cov += 1e-6 * torch.eye(cov.shape[0])  # Add perturbation for stability
        eigvals, eigvecs = torch.linalg.eigh(cov)
        P = eigvecs

        for i, (ind, F_i, CR_i) in enumerate(zip(self.population, self.F, self.CR)):
            r1, r2 = self.rng.choice([j for j in range(len(self.population)) if j != i], 2, replace=False)
            v_i = ind + F_i * (self.population[best_idx] - ind) + F_i * (self.population[r1] - self.population[r2])

            r3 = self.rng.random()
            if r3 >= self.pb:
                u_i = ind.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_i[j] = v_i[j]
            else:
                x_prime = P.T @ ind
                v_prime = P.T @ v_i
                u_prime = x_prime.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_prime[j] = v_prime[j]
                u_i = P @ u_prime

            model.set_weights(u_i)
            u_fitness = compute_mse(model, inputs, targets)
            if u_fitness < fitnesses[i]:
                new_population.append(u_i)
                self.F[i] = self._sample_F()
                self.CR[i] = self._sample_CR()
            else:
                new_population.append(ind)

        self.population = new_population
        return fitnesses

# EDEAdam Algorithm
class EDEAdam:
    def __init__(self, model, pop_size=50, max_evals=25000, exchange_interval=5):
        self.model = model
        self.pop_size = pop_size
        self.max_evals = max_evals
        self.exchange_interval = exchange_interval
        self.dim = sum(p.numel() for p in model.parameters())
        # Verify dimension
        expected_dim = model.input_size * model.hidden_size + model.hidden_size + model.hidden_size * model.output_size + model.output_size
        assert self.dim == expected_dim, f"Dimension mismatch: got {self.dim}, expected {expected_dim}"
        # Initialize population
        self.population = [torch.rand(self.dim) * 2 - 1 for _ in range(pop_size)]
        self.sub_pop1 = self.population[:pop_size//2]
        self.sub_pop2 = self.population[pop_size//2:]
        self.p_adam = PAdam(self.sub_pop1)
        self.m_cobide = MCoBiDE(self.sub_pop2)

    def run(self, inputs, targets):
        t = 1
        eval_count = 0
        best_fitness = float('inf')
        best_individual = None

        while eval_count < self.max_evals:
            fitness1 = self.p_adam.step(self.model, inputs, targets, t)
            fitness2 = self.m_cobide.step(self.model, inputs, targets)
            eval_count += len(self.sub_pop1) + len(self.sub_pop2)

            best_idx1, worst_idx1 = np.argmin(fitness1), np.argmax(fitness1)
            best_idx2, worst_idx2 = np.argmin(fitness2), np.argmax(fitness2)

            if min(fitness1 + fitness2) < best_fitness:
                best_fitness = min(fitness1 + fitness2)
                best_individual = self.sub_pop1[best_idx1] if fitness1[best_idx1] < fitness2[best_idx2] else self.sub_pop2[best_idx2]

            if t % self.exchange_interval == 0:
                if fitness1[best_idx1] < fitness2[worst_idx2]:
                    self.sub_pop2[worst_idx2] = self.sub_pop1[best_idx1].clone()
                if fitness2[best_idx2] < fitness1[worst_idx1]:
                    self.sub_pop1[worst_idx1] = self.sub_pop2[best_idx2].clone()

            t += 1

        self.model.set_weights(best_individual)
        return best_fitness, eval_count

# Load and preprocess Wine dataset
data = load_wine()
X = data.data
y = data.target  # Use class indices for accuracy computation

# Split data into train, validation, and test sets
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, stratify=y_temp, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# One-hot encode targets for MSE optimization
y_train_onehot = np.zeros((y_train.shape[0], 3))
y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1
y_val_onehot = np.zeros((y_val.shape[0], 3))
y_val_onehot[np.arange(y_val.shape[0]), y_val] = 1
y_test_onehot = np.zeros((y_test.shape[0], 3))
y_test_onehot[np.arange(y_test.shape[0]), y_test] = 1

# Convert to tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train_onehot = torch.tensor(y_train_onehot, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val_onehot = torch.tensor(y_val_onehot, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test_onehot = torch.tensor(y_test_onehot, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Define hyperparameter grid
pop_size_values = [50, 100, 200]
max_evals_values = [1000, 2500, 5000]
exchange_interval_values = [5, 10, 20]

# Grid search
best_val_accuracy = 0
best_params = {}
best_model_state = None
best_fitness = float('inf')
best_eval_count = 0

for pop_size in pop_size_values:
    for max_evals in max_evals_values:
        for exchange_interval in exchange_interval_values:
            print(f"Testing: pop_size={pop_size}, max_evals={max_evals}, exchange_interval={exchange_interval}")
            model = MLP(input_size=13, hidden_size=16, output_size=3)
            ede_adam = EDEAdam(model, pop_size=pop_size, max_evals=max_evals, exchange_interval=exchange_interval)
            fitness, eval_count = ede_adam.run(X_train, y_train_onehot)
            val_accuracy = compute_accuracy(model, X_val, y_val)
            print(f"Validation accuracy: {val_accuracy:.6f}")
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                best_params = {
                    'pop_size': pop_size,
                    'max_evals': max_evals,
                    'exchange_interval': exchange_interval
                }
                best_model_state = model.state_dict()
                best_fitness = fitness
                best_eval_count = eval_count

# Load best model state
model = MLP(input_size=13, hidden_size=16, output_size=3)
model.load_state_dict(best_model_state)

# Evaluate on train, validation, and test sets
model.eval()
train_accuracy = compute_accuracy(model, X_train, y_train)
val_accuracy = compute_accuracy(model, X_val, y_val)
test_accuracy = compute_accuracy(model, X_test, y_test)

# Print results
print("\nBest Hyperparameters:")
print(f"pop_size: {best_params['pop_size']}")
print(f"max_evals: {best_params['max_evals']}")
print(f"exchange_interval: {best_params['exchange_interval']}")
print(f"Best training MSE: {best_fitness:.6f}")
print(f"Training accuracy: {train_accuracy:.6f}")
print(f"Validation accuracy: {val_accuracy:.6f}")
print(f"Test accuracy: {test_accuracy:.6f}")
print(f"Total evaluations: {best_eval_count}")

Testing: pop_size=50, max_evals=1000, exchange_interval=5
Validation accuracy: 0.555556
Testing: pop_size=50, max_evals=1000, exchange_interval=10
Validation accuracy: 0.555556
Testing: pop_size=50, max_evals=1000, exchange_interval=20
Validation accuracy: 0.259259
Testing: pop_size=50, max_evals=2500, exchange_interval=5
Validation accuracy: 0.296296
Testing: pop_size=50, max_evals=2500, exchange_interval=10


KeyboardInterrupt: 