#### Task2 Scalar Training

In [20]:
# imports framework
import os
from evoman.environment import Environment
from demo_controller import player_controller

import numpy as np

# Pymoo stuff
import pymoo
from pymoo.algorithms.moo.unsga3 import UNSGA3
from pymoo.optimize import minimize
from pymoo.core.problem import ElementwiseProblem
from pymoo.util.reference_direction import UniformReferenceDirectionFactory

#### Environment Setup

In [21]:
# Enemies to train on
train_enemies = [1, 2, 3, 8]           

experiment_name = f"training_SCAL{len(train_enemies)}"    # For making directories for stat tracking / solution files
if not os.path.exists(experiment_name):
    os.makedirs(experiment_name)

# Initial Environment parameters
n_hidden_neurons = 10               # Hidden neurons in player_controller 

# choose this for not using visuals and thus making experiments faster
headless = True
if headless:
    os.environ["SDL_VIDEODRIVER"] = "dummy"

# initializes simulation in individual evolution mode, for single static enemy.
env = Environment(experiment_name=experiment_name,
                  enemies=train_enemies,                                          
                  playermode="ai",
                  player_controller=player_controller(n_hidden_neurons),
                  enemymode="static",
                  multiplemode="yes",
                  level=2,
                  speed="fastest",
                  visuals=False,
                  logs="off")

# Uncomment to show env info.
# env.state_to_log() 

#### Pymoo Problem definition

In [22]:
# Fitness function wrapper for pymoo
def evaluate(x, env=env):
    fit = env.play(pcont=x)
    return fit

class SingleObjectiveFitness(ElementwiseProblem):
    def __init__(self, n_var, **kwargs):
        self.n_var = n_var

        # Define lower and upper bound for the variables
        self.xl = np.ones(self.n_var)*-1
        self.xu = np.ones(self.n_var)

        # 1 objective function, no constraints:
        super().__init__(n_var=self.n_var, n_obj=1, n_ieq_constr=0, xl=self.xl, xu=self.xu, **kwargs) 

    def _evaluate(self, x, out, *args, **kwargs):
        # Pymoo _minimises_ functions, so in order to maximise, we return -fitness
        fitness,_,_,_ = evaluate(x)
        out["F"] = -fitness

#### Define Training Function

In [23]:
def scalar_train(env, n_hidden_neurons: int=10, n_runs: int=10, n_gens: int=50) -> list:
    """
    n training runs of algorithm on problem for n generations.
    """
    results = []
    N_VAR = (env.get_num_sensors()+1)*n_hidden_neurons+(n_hidden_neurons+1)*5

    for i in range(n_runs):
        print(f"Scalar: Training run {i+1} of {n_runs} of {n_gens} generations\n")
        problem_scalar = SingleObjectiveFitness(N_VAR)
        ref_dirs_scalar = UniformReferenceDirectionFactory(1, n_points=1).do()
        algorithm_scalar = UNSGA3(ref_dirs_scalar, pop_size=100)
        res = pymoo.optimize.minimize(  problem_scalar, 
                                        algorithm_scalar,
                                        termination=('n_iter', n_gens),
                                        seed=i,
                                        verbose=True,
                                        save_history=True)
        
        results.append(res)
        
    return results

#### Run Training  

In [24]:
# Run training to show it works (for the report we did more generations)
training_results = scalar_train(env, n_hidden_neurons, 10, 5)

Scalar: Training run 1 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      1 |             - |             -
     2 |      200 |      1 |  1.4184005344 |         ideal
     3 |      300 |      1 |  0.000000E+00 |             f
     4 |      400 |      1 |  1.521613E+01 |         ideal
     5 |      500 |      1 |  0.000000E+00 |             f
Scalar: Training run 2 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      1 |             - |             -
     2 |      200 |      1 |  9.1448137637 |         ideal
     3 |      300 |      1 |  1.164181E+01 |         ideal
     4 |      400 |      1 |  0.000000E+00 |             f
     5 |      500 |      1 |  0.000000E+00 |             f
Scalar: Training run 3 of 10 of 5 generations

n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |      100 |      1 |             - |             -
     2 |      200 |      1 |  1.0

#### Test Solutions

In [25]:
def test_solution_against_individuals(env, result, test_enemies: list=[1,2,3,4,5,6,7,8], verbose: bool=True) -> tuple:
    """
    Returns 2 lists: 
    List 1 contains the fitnesses against all n enemies
    List 2 contains the individual gains against all n enemies
    """

    env.update_parameter("multiplemode", "no")

    # load trained weights for enemy i
    weights = result

    # test weights against enemies
    fits = []
    gains = []

    # Test against all enemies
    for en in test_enemies:
        env.update_parameter('enemies',[en])
        f,ep,ee,t = env.play(pcont=weights)
        gain = ep-ee
        fits.append(f)
        gains.append(gain)

        if verbose:
            print(f"Enemy {en}:   Fitness: {f:7.4f}   Gain: {gain:7.2f}")
    
    if verbose:
        print(f"Solution beats {sum(x > 0 for x in gains)} enemies. \
                \nSummed gain: {sum(gains):.2f}  Mean gain: {sum(gains)/8:.2f}")

    beaten = sum(x > 0 for x in gains)

    return fits, gains, beaten

In [26]:
gains_result = []
for i in range(len(training_results)):
    print(f"Training run {i+1} best solution:")
    _,gains,_ = test_solution_against_individuals(env, training_results[i].X, verbose=True)
    gains_result.append(sum(gains))
    print("\n")

Training run 1 best solution:
Enemy 1:   Fitness: 48.2254   Gain:  -40.00
Enemy 2:   Fitness: 91.9724   Gain:   76.00
Enemy 3:   Fitness: 48.1799   Gain:  -40.00
Enemy 4:   Fitness: 20.5230   Gain:  -70.00
Enemy 5:   Fitness: 56.5261   Gain:  -30.00
Enemy 6:   Fitness: -5.5683   Gain: -100.00
Enemy 7:   Fitness: 22.4150   Gain:  -70.00
Enemy 8:   Fitness: 74.4097   Gain:  -10.00
Solution beats 1 enemies.                 
Summed gain: -284.00  Mean gain: -35.50


Training run 2 best solution:
Enemy 1:   Fitness: 48.2634   Gain:  -40.00
Enemy 2:   Fitness: 57.2009   Gain:  -30.00
Enemy 3:   Fitness: 66.1770   Gain:  -20.00
Enemy 4:   Fitness: 21.2443   Gain:  -70.00
Enemy 5:   Fitness: 90.2303   Gain:   62.80
Enemy 6:   Fitness: -4.8283   Gain: -100.00
Enemy 7:   Fitness:  5.4165   Gain:  -90.00
Enemy 8:   Fitness: 85.0177   Gain:   13.00
Solution beats 2 enemies.                 
Summed gain: -274.20  Mean gain: -34.27


Training run 3 best solution:
Enemy 1:   Fitness: 39.1652   Gain: 