#### Task2 Pareto Training

In [41]:
# imports framework
import os
from evoman.environment import Environment
from demo_controller import player_controller

import numpy as np

# Pymoo stuff
import pymoo
from pymoo.algorithms.moo.unsga3 import UNSGA3
from pymoo.optimize import minimize
from pymoo.core.problem import ElementwiseProblem
from pymoo.util.reference_direction import UniformReferenceDirectionFactory


#### Environment Setup

In [42]:
# Enemies to train on
train_enemies = [1, 2, 3, 8]           

experiment_name = f"training_PAR{len(train_enemies)}"    # For making directories for stat tracking / solution files
if not os.path.exists(experiment_name):
    os.makedirs(experiment_name)

# Initial Environment parameters
n_hidden_neurons = 10               # Hidden neurons in player_controller 

# choose this for not using visuals and thus making experiments faster
headless = True
if headless:
    os.environ["SDL_VIDEODRIVER"] = "dummy"

# initializes simulation in individual evolution mode, for single static enemy.
env = Environment(experiment_name=experiment_name,
                  enemies=train_enemies,                                          
                  playermode="ai",
                  player_controller=player_controller(n_hidden_neurons),
                  enemymode="static",
                  multiplemode="yes",
                  level=2,
                  speed="fastest",
                  visuals=False,
                  logs="off")

# Uncomment to show env info.
# env.state_to_log() 

#### Pymoo Problem definition

In [43]:
# Utility function for updating the environment based on a list of enemies:
def evo_env_update(enemy, env=env):
    if type(enemy) != list:
        enemy = [enemy]
    if len(enemy) == 1:
        env.update_parameter("multiplemode", "no")
    else:
        env.update_parameter("multiplemode", "yes")
    env.update_parameter("enemies", enemy)

In [44]:
# Fitness function wrapper for pymoo
def evaluate(x, env=env):
    fit = env.play(pcont=x)
    return fit

class MultiObjectiveFitness(ElementwiseProblem):
    def __init__(self, n_var, train_enemies, **kwargs):
        self.train_enemies = train_enemies
        self.n_var = n_var
        
        # Define lower and upper bound for the variables
        self.xl = np.ones(self.n_var) * -1
        self.xu = np.ones(self.n_var)

        # Number of objectives = number of enemies
        super().__init__(n_var=self.n_var, n_obj=len(train_enemies), n_ieq_constr=0, xl=self.xl, xu=self.xu, **kwargs)

    def _evaluate(self, x, out, *args, **kwargs):
        
        # we have one objective (fitness) per enemy
        fitnesses = []
        for enemy in self.train_enemies:

            # update environment to use one enemy at a time
            evo_env_update(enemy)

            # get fitness (negative bc minimizing) for this enemy
            fitness,_,_,_ = evaluate(x)
            fitnesses.append(-fitness)
        
        # assign the fitnesses to the objectives (multi-objective)
        out["F"] = np.array(fitnesses)

#### Define Training Function

In [45]:
def pareto_train(env, n_hidden_neurons: int=10, n_runs: int=10, n_gens: int=50) -> list:
    """
    n training runs of algorithm on problem for n generations.
    """
    results = []
    N_VAR = (env.get_num_sensors()+1)*n_hidden_neurons+(n_hidden_neurons+1)*5

    for i in range(n_runs):
        print(f"Pareto: Training run {i+1} of {n_runs} of {n_gens} generations\n")
        problem_pareto = MultiObjectiveFitness(N_VAR, train_enemies)
        ref_dirs = UniformReferenceDirectionFactory(len(train_enemies), n_points=10).do()
        algorithm_pareto = UNSGA3(ref_dirs, pop_size=100)
        res = pymoo.optimize.minimize(  problem_pareto, 
                                        algorithm_pareto,
                                        termination=('n_iter', n_gens),
                                        seed=i,
                                        verbose=True,
                                        save_history=True)
        
        results.append(res)
        
    return results

#### Run Training  

In [46]:
# Run training
training_results = pareto_train(env, n_hidden_neurons, 10, 5)

Pareto: Training run 1 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      3 |             - |             -
     2 |      200 |      5 |  0.3945475491 |             f
     3 |      300 |      5 |  0.0275902562 |         ideal
     4 |      400 |      5 |  0.5309321647 |         ideal
     5 |      500 |      7 |  0.0107293168 |         ideal
Pareto: Training run 2 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      4 |             - |             -
     2 |      200 |      4 |  0.0034191849 |             f
     3 |      300 |      4 |  0.000000E+00 |             f
     4 |      400 |      5 |  0.0322227065 |         ideal
     5 |      500 |      5 |  0.4007237797 |         ideal
Pareto: Training run 3 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      6 |             - |             -
     2 |      200 |      4 |  0.0

#### Test Solutions

In [47]:
def test_solution_against_individuals(env, result, test_enemies: list=[1,2,3,4,5,6,7,8], verbose: bool=True) -> tuple:
    """
    Returns 2 lists: 
    List 1 contains the fitnesses against all n enemies
    List 2 contains the individual gains against all n enemies
    """

    env.update_parameter("multiplemode", "no")

    # load trained weights for enemy i
    weights = result

    # test weights against enemies
    fits = []
    gains = []

    # Test against all enemies
    for en in test_enemies:
        env.update_parameter('enemies',[en])
        f,ep,ee,t = env.play(pcont=weights)
        gain = ep-ee
        fits.append(f)
        gains.append(gain)

        if verbose:
            print(f"Enemy {en}:   Fitness: {f:7.4f}   Gain: {gain:7.2f}")
    
    if verbose:
        print(f"Solution beats {sum(x > 0 for x in gains)} enemies. \
                \nSummed gain: {sum(gains):.2f}  Mean gain: {sum(gains)/8:.2f}")

    beaten = sum(x > 0 for x in gains)

    return fits, gains, beaten

In [49]:
# Test results of multiple pareto runs, finding the best result for each run,
# then showing how it performs against all 8 enemies.

gains_result = []
best_solution_weights = []


# loop over all training runs
for i in range(len(training_results)):
    highest_gain_sum = -800
    max_beaten = 0

    # loop over pareto front to find best individual
    for j in range(training_results[i].X.shape[0]):
        test_weights = training_results[i].X[j,:]
        fits, gains, n_beaten = test_solution_against_individuals(env, test_weights, verbose=False)

        # store best individual
        if (sum(gains) > highest_gain_sum) and (n_beaten >= max_beaten):
            highest_gain_sum = sum(gains)
            max_beaten = n_beaten
            best_weights = test_weights

    best_solution_weights.append(best_weights)

    print(f"Training run {i+1} best solution:")
    _,gains,_ = test_solution_against_individuals(env, best_solution_weights[i], verbose=True)
    gains_result.append(sum(gains))
    print("\n")

Training run 1 best solution:
Enemy 1:   Fitness: 30.2474   Gain:  -60.00
Enemy 2:   Fitness: 90.8470   Gain:   64.00
Enemy 3:   Fitness: 30.9374   Gain:  -60.00
Enemy 4:   Fitness: 21.4053   Gain:  -70.00
Enemy 5:   Fitness: 86.4970   Gain:   23.20
Enemy 6:   Fitness: 66.3760   Gain:  -20.00
Enemy 7:   Fitness: 66.5194   Gain:  -20.00
Enemy 8:   Fitness: 84.3462   Gain:    5.20
Solution beats 3 enemies.                 
Summed gain: -137.60  Mean gain: -17.20


Training run 2 best solution:
Enemy 1:   Fitness: 21.2570   Gain:  -70.00
Enemy 2:   Fitness: 91.4906   Gain:   70.00
Enemy 3:   Fitness: 30.8880   Gain:  -60.00
Enemy 4:   Fitness: 21.4053   Gain:  -70.00
Enemy 5:   Fitness: 90.3965   Gain:   59.80
Enemy 6:   Fitness: 21.9062   Gain:  -70.00
Enemy 7:   Fitness: 75.2863   Gain:  -10.00
Enemy 8:   Fitness: 84.3462   Gain:    5.20
Solution beats 3 enemies.                 
Summed gain: -145.00  Mean gain: -18.12


Training run 3 best solution:
Enemy 1:   Fitness: 21.2699   Gain: 