# DEAW

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# Załaduj zbiór danych Iris
iris = load_wine()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class MLP(nn.Module):
    def __init__(self, input_size=13, hidden_size=16, output_size=3):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Funkcja oceny (fitness)
def fitness(weights, model, train_loader, device):
    idx = 0
    for param in model.parameters():
        numel = param.numel()
        param.data = weights[idx:idx+numel].view(param.size()).to(device)
        idx += numel
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
    return total_loss / len(train_loader.dataset)

# Algorytm DEAW
def train_deaw(model, train_loader, device, NP=50, F=0.5, CR=0.9, max_generations=100, initial_lower=-1.0, initial_upper=1.0):
    num_weights = sum(p.numel() for p in model.parameters())
    lower_bounds = np.full(num_weights, initial_lower)
    upper_bounds = np.full(num_weights, initial_upper)
    population = np.random.uniform(initial_lower, initial_upper, (NP, num_weights))
    fitnesses = np.array([fitness(torch.tensor(p, dtype=torch.float32), model, train_loader, device) for p in population])

    for generation in range(max_generations):
        for i in range(NP):
            candidates = [j for j in range(NP) if j != i]
            a, b, c = np.random.choice(candidates, 3, replace=False)
            v = population[a] + F * (population[b] - population[c])

            for j in range(num_weights):
                if v[j] < lower_bounds[j]:
                    lower_bounds[j] *= 3
                    v[j] = lower_bounds[j]
                elif v[j] > upper_bounds[j]:
                    upper_bounds[j] *= 3
                    v[j] = upper_bounds[j]

            u = np.copy(population[i])
            j_rand = np.random.randint(0, num_weights)
            for j in range(num_weights):
                if np.random.rand() < CR or j == j_rand:
                    u[j] = v[j]

            u_tensor = torch.tensor(u, dtype=torch.float32)
            loss_u = fitness(u_tensor, model, train_loader, device)

            if loss_u < fitnesses[i]:
                population[i] = u.copy()
                fitnesses[i] = loss_u

    best_idx = np.argmin(fitnesses)
    best_weights = torch.tensor(population[best_idx], dtype=torch.float32)
    idx = 0
    for param in model.parameters():
        numel = param.numel()
        param.data = best_weights[idx:idx+numel].view(param.size()).to(device)
        idx += numel

# Użycie
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP().to(device)
train_deaw(model, train_loader, device)

In [11]:
def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
accuracy = evaluate_model(model, test_loader, device)
print(f"Dokładność na zbiorze testowym: {accuracy:.2f}%")

Dokładność na zbiorze testowym: 66.67%


In [8]:
def train_de(model, train_loader, device, NP=50, F=0.5, CR=0.9, max_generations=100, initial_lower=-1.0, initial_upper=1.0):
    num_weights = sum(p.numel() for p in model.parameters())
    population = np.random.uniform(initial_lower, initial_upper, (NP, num_weights))
    fitnesses = np.array([fitness(torch.tensor(p, dtype=torch.float32), model, train_loader, device) for p in population])

    for generation in range(max_generations):
        for i in range(NP):
            candidates = [j for j in range(NP) if j != i]
            a, b, c = np.random.choice(candidates, 3, replace=False)
            v = population[a] + F * (population[b] - population[c])

            # Stałe ograniczenie: ograniczamy wartości do zakresu
            v = np.clip(v, initial_lower, initial_upper)

            u = np.copy(population[i])
            j_rand = np.random.randint(0, num_weights)
            for j in range(num_weights):
                if np.random.rand() < CR or j == j_rand:
                    u[j] = v[j]

            u_tensor = torch.tensor(u, dtype=torch.float32)
            loss_u = fitness(u_tensor, model, train_loader, device)

            if loss_u < fitnesses[i]:
                population[i] = u.copy()
                fitnesses[i] = loss_u

    best_idx = np.argmin(fitnesses)
    best_weights = torch.tensor(population[best_idx], dtype=torch.float32)
    idx = 0
    for param in model.parameters():
        numel = param.numel()
        param.data = best_weights[idx:idx+numel].view(param.size()).to(device)
        idx += numel


In [9]:
model_de = MLP().to(device)
train_de(model_de, train_loader, device)
accuracy_de = evaluate_model(model_de, test_loader, device)
print(f"[DE] Dokładność na zbiorze testowym: {accuracy_de:.2f}%")


[DE] Dokładność na zbiorze testowym: 66.67%


# EDEADAM

In [24]:
import torch
import torch.nn as nn
import numpy as np
from scipy.stats import cauchy
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

# FFNN for Iris dataset (4-9-3 architecture)
class FFNN(nn.Module):
    def __init__(self, input_size=4, hidden_size=9, output_size=3):
        super(FFNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.layer1 = nn.Linear(input_size, hidden_size)  # Includes bias
        self.layer2 = nn.Linear(hidden_size, output_size)  # Includes bias
        # Initialize weights in [-1, 1]
        nn.init.uniform_(self.layer1.weight, -1, 1)
        nn.init.uniform_(self.layer2.weight, -1, 1)
        nn.init.uniform_(self.layer1.bias, -1, 1)
        nn.init.uniform_(self.layer2.bias, -1, 1)

    def forward(self, x):
        x = sigmoid(self.layer1(x))  # Bias included in layer1
        x = self.layer2(x)  # Bias included in layer2, no sigmoid on output for MSE
        return x

    def get_weights(self):
        # Flatten weights and biases into a vector
        return torch.cat([
            self.layer1.weight.flatten(),
            self.layer1.bias.flatten(),
            self.layer2.weight.flatten(),
            self.layer2.bias.flatten()
        ])

    def set_weights(self, weights):
        # Set weights from a flat vector
        idx = 0
        w1_size = self.input_size * self.hidden_size
        self.layer1.weight.data = weights[idx:idx+w1_size].reshape(self.hidden_size, self.input_size)
        idx += w1_size
        self.layer1.bias.data = weights[idx:idx+self.hidden_size]
        idx += self.hidden_size
        w2_size = self.hidden_size * self.output_size
        self.layer2.weight.data = weights[idx:idx+w2_size].reshape(self.output_size, self.hidden_size)
        idx += w2_size
        self.layer2.bias.data = weights[idx:idx+self.output_size]

# Compute MSE loss
def compute_mse(model, inputs, targets):
    outputs = model(inputs)
    mse = torch.mean((outputs - targets) ** 2)
    return mse.item()

# Population-based Adam (P-Adam)
class PAdam:
    def __init__(self, population, alpha=0.1, gamma1=0.9, gamma2=0.99, gamma3=0.999, tau=1e-7):
        self.population = population
        self.alpha = alpha
        self.gamma1 = gamma1
        self.gamma2 = gamma2
        self.gamma3 = gamma3
        self.tau = tau
        self.m = [torch.zeros_like(ind) for ind in population]  # First moment
        self.n = [torch.zeros_like(ind) for ind in population]  # Second moment

    def step(self, model, inputs, targets, t):
        new_population = []
        fitnesses = []
        for i, (ind, m_i, n_i) in enumerate(zip(self.population, self.m, self.n)):
            model.set_weights(ind)
            outputs = model(inputs)
            loss = torch.mean((outputs - targets) ** 2)
            loss.backward()

            # Compute gradients
            grads = torch.cat([
                model.layer1.weight.grad.flatten(),
                model.layer1.bias.grad.flatten(),
                model.layer2.weight.grad.flatten(),
                model.layer2.bias.grad.flatten()
            ])

            # Update moments
            m_i = self.gamma1 * m_i + (1 - self.gamma1) * grads
            n_i = self.gamma2 * n_i + (1 - self.gamma3) * (grads ** 2)

            # Bias correction
            m_hat = m_i / (1 - self.gamma1 ** t)
            n_hat = n_i / (1 - self.gamma3 ** t)

            # Update parameters
            new_ind = ind - self.alpha * m_hat / (torch.sqrt(n_hat) + self.tau)
            new_population.append(new_ind)
            fitnesses.append(compute_mse(model, inputs, targets))

            # Zero gradients
            model.zero_grad()

            self.m[i] = m_i
            self.n[i] = n_i

        self.population = new_population
        return fitnesses

# Modified CoBiDE (M-CoBiDE)
class MCoBiDE:
    def __init__(self, population, pb=0.5, ps=0.4):
        self.population = population
        self.pb = pb
        self.ps = ps
        self.rng = np.random.default_rng()
        self.F = [self._sample_F() for _ in population]
        self.CR = [self._sample_CR() for _ in population]

    def _sample_F(self):
        r = self.rng.random()
        if r < 0.5:
            return cauchy.rvs(loc=0.65, scale=0.1, random_state=self.rng)
        else:
            return cauchy.rvs(loc=1.0, scale=0.1, random_state=self.rng)

    def _sample_CR(self):
        r = self.rng.random()
        if r < 0.5:
            cr = cauchy.rvs(loc=0.1, scale=0.1, random_state=self.rng)
        else:
            cr = cauchy.rvs(loc=0.95, scale=0.1, random_state=self.rng)
        return np.clip(cr, 0, 1)

    def step(self, model, inputs, targets):
        fitnesses = [compute_mse(model, inputs, targets) for ind in self.population]
        best_idx = np.argmin(fitnesses)
        new_population = []

        # Compute covariance matrix for top ps proportion
        top_indices = np.argsort(fitnesses)[:int(self.ps * len(self.population))]
        top_pop = torch.stack([self.population[i] for i in top_indices])
        cov = torch.cov(top_pop.T)
        cov += 1e-6 * torch.eye(cov.shape[0])  # Add perturbation for stability
        eigvals, eigvecs = torch.linalg.eigh(cov)
        P = eigvecs

        for i, (ind, F_i, CR_i) in enumerate(zip(self.population, self.F, self.CR)):
            r1, r2 = self.rng.choice([j for j in range(len(self.population)) if j != i], 2, replace=False)
            v_i = ind + F_i * (self.population[best_idx] - ind) + F_i * (self.population[r1] - self.population[r2])

            r3 = self.rng.random()
            if r3 >= self.pb:
                u_i = ind.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_i[j] = v_i[j]
            else:
                x_prime = P.T @ ind
                v_prime = P.T @ v_i
                u_prime = x_prime.clone()
                j_rand = self.rng.integers(0, len(ind))
                for j in range(len(ind)):
                    if self.rng.random() <= CR_i or j == j_rand:
                        u_prime[j] = v_prime[j]
                u_i = P @ u_prime

            model.set_weights(u_i)
            u_fitness = compute_mse(model, inputs, targets)
            if u_fitness < fitnesses[i]:
                new_population.append(u_i)
                self.F[i] = self._sample_F()
                self.CR[i] = self._sample_CR()
            else:
                new_population.append(ind)

        self.population = new_population
        return fitnesses

# EDEAdam Algorithm
class EDEAdam:
    def __init__(self, model, pop_size=50, max_evals=25000, exchange_interval=5):
        self.model = model
        self.pop_size = pop_size
        self.max_evals = max_evals
        self.exchange_interval = exchange_interval
        self.dim = sum(p.numel() for p in model.parameters())
        # Verify dimension
        expected_dim = model.input_size * model.hidden_size + model.hidden_size + model.hidden_size * model.output_size + model.output_size
        assert self.dim == expected_dim, f"Dimension mismatch: got {self.dim}, expected {expected_dim}"
        # Initialize population
        self.population = [torch.rand(self.dim) * 2 - 1 for _ in range(pop_size)]
        self.sub_pop1 = self.population[:pop_size//2]
        self.sub_pop2 = self.population[pop_size//2:]
        self.p_adam = PAdam(self.sub_pop1)
        self.m_cobide = MCoBiDE(self.sub_pop2)

    def run(self, inputs, targets):
        t = 1
        eval_count = 0
        best_fitness = float('inf')
        best_individual = None

        while eval_count < self.max_evals:
            fitness1 = self.p_adam.step(self.model, inputs, targets, t)
            fitness2 = self.m_cobide.step(self.model, inputs, targets)
            eval_count += len(self.sub_pop1) + len(self.sub_pop2)

            best_idx1, worst_idx1 = np.argmin(fitness1), np.argmax(fitness1)
            best_idx2, worst_idx2 = np.argmin(fitness2), np.argmax(fitness2)

            if min(fitness1 + fitness2) < best_fitness:
                best_fitness = min(fitness1 + fitness2)
                best_individual = self.sub_pop1[best_idx1] if fitness1[best_idx1] < fitness2[best_idx2] else self.sub_pop2[best_idx2]

            if t % self.exchange_interval == 0:
                if fitness1[best_idx1] < fitness2[worst_idx2]:
                    self.sub_pop2[worst_idx2] = self.sub_pop1[best_idx1].clone()
                if fitness2[best_idx2] < fitness1[worst_idx1]:
                    self.sub_pop1[worst_idx1] = self.sub_pop2[best_idx2].clone()

            t += 1

        self.model.set_weights(best_individual)
        return best_fitness, eval_count



Final Fitness (MSE): 0.007978
Training Accuracy: 42.86%
Test Accuracy: 28.89%
Evaluations Used: 25000


In [None]:
    iris = load_iris()
    X = torch.tensor(StandardScaler().fit_transform(iris.data), dtype=torch.float32)
    y = torch.tensor(np.eye(3)[iris.target], dtype=torch.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y,stratify = y, test_size=0.3, random_state=42)

    torch.manual_seed(42)
    np.random.seed(42)

    model = FFNN()
    ede_adam = EDEAdam(model, pop_size=5000, max_evals=250000)

    final_fitness, evals = ede_adam.run(X_train, y_train)

    model.eval()
    with torch.no_grad():
        train_pred = model(X_train)
        train_acc = (train_pred.argmax(dim=1) == y_train.argmax(dim=1)).float().mean().item() * 100
        test_pred = model(X_test)
        test_acc = (test_pred.argmax(dim=1) == y_test.argmax(dim=1)).float().mean().item() * 100

    print(f"Final Fitness (MSE): {final_fitness:.6f}")
    print(f"Training Accuracy: {train_acc:.2f}%")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Evaluations Used: {evals}")
