In [1]:
import env_hiv
from pyvirtualdisplay import Display
from IPython import display
import torch
torch.multiprocessing.set_start_method("spawn")
import torch.nn.functional as F
import torch.multiprocessing as mp
import torch.nn as nn
import cma
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt
def fitness(x, ann, env, visul=False):
    ann.set_params(torch.Tensor(x))
    return -evaluate(ann, env, visul=visul)
def evaluate(ann, env, visul=True, maxiter = 200):
    #env.seed(0) # deterministic for demonstration
    obs,_ = env.reset()
    if visul:
        img = plt.imshow(env.render())
    total_reward = 0
    niter = 0
    while True:
        niter+=1
        # Output of the neural net
        net_output = ann(torch.from_numpy(obs).cuda())
        # the action is the value clipped returned by the nn
        action = net_output.data.cpu().numpy().argmax()
        obs, reward, done, trunc,infos = env.step(action)
        total_reward += reward
        if visul:
            img.set_data(env.render())
            plt.axis('off')
            display.display(plt.gcf())
            display.clear_output(wait=True)
        if done or niter>maxiter:
            break
    return total_reward

In [2]:
hidden_dim = 128
class NeuralNetwork(nn.Module):

    def __init__(self, input_shape, n_actions):
        super(NeuralNetwork, self).__init__()
        self.l1 = nn.Linear(input_shape, hidden_dim).cuda()
        self.l2 = nn.Linear(hidden_dim, hidden_dim).cuda()
        self.lout = nn.Linear(hidden_dim, n_actions).cuda()

    def forward(self, x):
        x = F.relu(self.l1(x.float()))
        x = F.relu(self.l2(x))
        return self.lout(x)

    def get_params(self):
        p = np.empty((0,))
        for n in self.parameters():
            p = np.append(p, n.flatten().cpu().detach().numpy())
        return p

    def set_params(self, x):
        start = 0
        for p in self.parameters():
            e = start + np.prod(p.shape)
            #p.data = torch.FloatTensor(x[start:e]).reshape(p.shape).cuda()
            p.data = (x[start:e]).to(torch.float32).reshape(p.shape).cuda()
            start = e

In [3]:

from joblib import delayed, Parallel
np.random.seed(123)
#env = gym.make("MountainCar-v0", render_mode = "rgb_array")
env = env_hiv.HIVPatient()
ann = NeuralNetwork(env.observation_space.shape[0], env.action_space.n).cuda()
es = cma.CMAEvolutionStrategy(len(ann.get_params()) * [0], 0.1, {'seed': 123})
from tqdm import tqdm
niter = 25
noffsprings = 25

for i in range(niter):
    solutions = torch.from_numpy(np.array(es.ask(noffsprings))).cuda()
    #fits = [fitness(x, ann, env) for x in solutions]
    fits=Parallel(n_jobs=17, backend = "loky")(delayed(fitness)(x, ann, env) for i, x in enumerate(solutions))    
    es.tell(solutions.cpu(), fits)
    es.disp()


(16_w,33)-aCMA-ES (mu_w=9.4,w_1=19%) in dimension 17924 (seed=123, Wed Feb 21 21:46:29 2024)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     25 -6.814066737911182e+06 1.0e+00 9.99e-02  1e-01  1e-01 0:15.4
    2     50 -6.814066737911182e+06 1.0e+00 9.99e-02  1e-01  1e-01 0:27.6
    3     75 -6.814066737911182e+06 1.0e+00 9.98e-02  1e-01  1e-01 0:39.3
    4    100 -6.980018828434741e+06 1.0e+00 9.98e-02  1e-01  1e-01 0:50.7
    5    125 -6.814066737911182e+06 1.0e+00 9.97e-02  1e-01  1e-01 1:02.4
    6    150 -6.814066737911182e+06 1.0e+00 9.96e-02  1e-01  1e-01 1:14.1
    7    175 -6.814066737911182e+06 1.0e+00 9.96e-02  1e-01  1e-01 1:25.6
    8    200 -7.103384947284224e+06 1.0e+00 9.95e-02  1e-01  1e-01 1:37.4
    9    225 -6.814066737911182e+06 1.0e+00 9.95e-02  1e-01  1e-01 1:49.1
   10    250 -6.814066737911182e+06 1.0e+00 9.94e-02  1e-01  1e-01 2:00.8
   11    275 -6.814066737911182e+06 1.0e+00 9.94e-02  1e-01  1e-01 2:12.5
   12    300 -6.81406

In [5]:
x = es.result[0]
-fitness(x, ann, env, visul=False)

7103384.9472842235