In [26]:
import torch
import torch.nn as nn

In [27]:
lin1 = nn.Linear(4, 3, bias=False)
lin2 = nn.Linear(4, 3, bias=False)

In [36]:
X = torch.randn((3, 2, 4))
w = torch.randn((3, 3, 4))
lin1.weight.data = w[0]
lin2.weight.data = w[1]
with torch.no_grad():
    y1 = lin1(X[0])
    y2 = lin1(X[1])
print(y1)
print(y2)

tensor([[ 0.4788, -3.9809,  0.9520],
        [ 7.2056,  0.8971, -0.4134]])
tensor([[ 0.2172, -2.3395, -1.6771],
        [ 1.1577, -1.7334,  0.2112]])


In [37]:
torch.einsum('pnm,pbm -> pbn', (w, X))

tensor([[[ 0.4788, -3.9809,  0.9520],
         [ 7.2056,  0.8971, -0.4134]],

        [[ 0.8224,  0.9175, -0.8378],
         [ 0.6752,  1.4937, -2.3643]],

        [[-1.2362, -1.9377, -1.8750],
         [ 2.5180,  1.3706, -1.4332]]])

In [38]:
class PopLinear(nn.Module):
    def __init__(self, in_features, out_features, pop_size=100, bias=True, *args, **kwargs):
        super(PopLinear, self).__init__(*args, **kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.pop_size = pop_size
        self.weight = nn.Parameter(data=torch.randn((pop_size, out_features, in_features)), requires_grad=False)
        if bias:
            self.bias = nn.Parameter(data=torch.randn((pop_size, out_features)), requires_grad=False)
        else:
            self.bias = None
            
    def vectorize(self):
        w = self.weight.data.view(self.pop_size, -1)
        if self.bias is not None:
            b = self.bias.view(self.pop_size, -1)
            w = torch.cat([w, b], dim=1)
        return w
    
    def assign_vector(self, w):
        if self.bias is not None:
            b_dim = self.bias.shape[1] * -1
            slc = w[:, b_dim:]
            self.bias.data = w[:, b_dim:]
            self.weight.data = w[:, :b_dim].view(self.pop_size, self.out_features, self.in_features)
        else:
            b_dim = -1
            self.weight.data = w.reshape(self.pop_size, self.out_features, self.in_features)
        
    def forward(self, inp):
        X = torch.einsum('pnm,pbm -> pbn', (self.weight.data, inp))
        if self.bias is not None:
            X += self.bias.data[:, None, :]
        return X
    
lin3 = PopLinear(4, 3, 3, bias=False)
lin3.weight.data = w

In [39]:
vec = lin3.vectorize()
lin3.assign_vector(vec)

In [40]:
lin3(X)

tensor([[[ 0.4788, -3.9809,  0.9520],
         [ 7.2056,  0.8971, -0.4134]],

        [[ 0.8224,  0.9175, -0.8378],
         [ 0.6752,  1.4937, -2.3643]],

        [[-1.2362, -1.9377, -1.8750],
         [ 2.5180,  1.3706, -1.4332]]])

In [34]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X, y = load_digits(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=.1, random_state=42)

Xmean = Xtr.mean()
Xstd = Xtr.std()
Xtr -= Xmean
Xte -= Xmean
Xtr /= Xstd
Xte /= Xstd

In [12]:
class LEEAModel(nn.Module):
    def __init__(self, pop_size=100, *args, **kwargs):
        super(LEEAModel, self).__init__(*args, **kwargs)
        self.pop_size = pop_size
        self.inp = PopLinear(in_features=64, out_features=64, pop_size=pop_size)
        self.dense1 = PopLinear(in_features=64, out_features=64, pop_size=pop_size)
        self.dense2 = PopLinear(in_features=64, out_features=64, pop_size=pop_size)
        self.dense3 = PopLinear(in_features=64, out_features=10, pop_size=pop_size)
        
    def forward(self, inp):
        x = inp[None, :, :].expand((self.pop_size, -1, 64))
        x = torch.relu(self.inp(x))
        x = torch.relu(self.dense1(x))
        x = torch.relu(self.dense2(x))
        out = self.dense3(x)
        return out
        
        
model = LEEAModel()

def get_fitnesses(X, y):
    out = model(X)
    res = torch.argmax(out, dim=2)
    fitnesses = (res == y).sum(dim=1)
    return fitnesses

def batch_generator(X, y, batch_size=128):
    X = torch.tensor(X).float()
    y = torch.tensor(y)
    dim = X.shape[0]
    while True:
        ind = torch.randint(high=dim, size=(batch_size,))
        yield X[ind], y[ind]
        
gen = batch_generator(Xtr, ytr)
X_, y_ = next(gen)
fitnesses = get_fitnesses(X_, y_)

In [46]:
class LEEAOptimizer(object):
    def __init__(self, parameters=[None], pop_size=100, sigma=0.001, alpha=1.00, pE=0.05, pC=0.50, pM=0.45, rho=0.5):
        self.pop_size=pop_size
        self.sigma=sigma
        self.alpha=alpha
        self.pE=pE
        self.pC=pC
        self.pM=pM
        self.rho=rho
        self.p1_fitness = torch.tensor([0 for _ in range(pop_size)])
        self.p2_fitness = torch.tensor([0 for _ in range(pop_size)])
        self.individuals = self.process_parameters(parameters)
        
    def process_parameters(self, parameters):
        params = []
        for prm in parameters:
            params.append(prm.data.view(self.pop_size, -1))
        return torch.cat(params, dim=1)
        
    def update(self, fitnesses):
        f_prime = (((self.p1_fitness + self.p2_fitness)/2) * (1-self.alpha)) + fitnesses
        n_elites = int(self.pop_size * self.pE)
        _, elites = torch.topk(f_prime, n_elites)
        k = int(self.pop_size * self.rho)
        f_primes, selected = torch.topk(f_prime, k)
        next_generation = self.produce_generation(selected, f_primes, elites)
        return next_generation
        
    def produce_generation(self, selected, f_primes, elites):
        next_generation = [self.individuals[elites]]
        n_crossover = int(self.pC * self.pop_size)
        next_generation.append()
        return next_generation
        
    
optim = LEEAOptimizer(model.parameters())
optim.update(fitnesses).shape

torch.Size([5, 13130])

In [21]:
for _ in model.parameters():
    print(_.shape)

torch.Size([100, 64, 64])
torch.Size([100, 64])
torch.Size([100, 64, 64])
torch.Size([100, 64])
torch.Size([100, 64, 64])
torch.Size([100, 64])
torch.Size([100, 10, 64])
torch.Size([100, 10])


In [77]:
inp = torch.tensor([1.0, 1.0, 1.0, 1.0])[None, :]
inp.repeat((2, 1))

#torch.multinomial(torch.tensor([1.0, 1.0, 1.0, 1.0])[None, :].repeat((2,1)), num_samples=2)
torch.multinomial(torch.tensor([1.0, 1.0, 10000.0, 1.0]), num_samples=2, replacement=True)

tensor([2, 2])