In [106]:
import random
import math
import copy
import csv

import numpy as np

import matplotlib
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [107]:
# read example file
def read_example_file(filename):
    with open(filename) as fp:
        prob_setting = list(map(lambda x: int(x), fp.readline().split()))
        if len(prob_setting) == 1:
            n_targets, n_weapons = prob_setting[0], prob_setting[0]
        else:
            n_targets, n_weapons = prob_setting

        values = []
        for i in range(n_targets):
            values.append(float(fp.readline()))

        probabilities = []
        for i in range(n_weapons):
            probabilities.append([])
            for j in range(n_targets):
                probabilities[i].append(float(fp.readline()))
    return n_targets, n_weapons, values, probabilities

# DQN-related code

In [108]:
'''Take in n by m matrix, convert it to 1D feature vector '''
class DQN(nn.Module):
    def __init__(self, n, m, embedding_size=8, units=64):
        super(DQN, self).__init__()
        # The assignment becomes embedded, so it has size m * embedding_size
        # when flattened
        # The n comes from the values attached
        self.assignment_size = m * embedding_size
        self.input_size = self.assignment_size + n
        self.output_size = m * n
        self.n = n
        self.m = m
    
        self.embedding_size = embedding_size
        # Embed the targets, since the actual numerical value of the
        # targets don't mean anything
        # Another idea: skip the middleman and replace the targets
        # with the target values
        self.embedding = nn.Embedding(n, self.embedding_size)
        self.lin1 = nn.Linear(self.input_size, units)
        self.drop1 = nn.Dropout(0.2)
        self.lin2 = nn.Linear(units, self.output_size)
        self.drop2 = nn.Dropout(0.2)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, state):
        assignment = state[:, :self.m].long()
        assignment = self.embedding(assignment)
        
        values = state[:, self.m:].float()
                
        # Flatten the assignment embedding
        assignment = assignment.view(-1, self.assignment_size).float() 
        
        # and concatenate the values
        x = torch.cat([assignment, values], dim=1)
        
        x = F.relu(self.drop1(self.lin1(x)))
        x = F.relu(self.lin2(x))
        return x

# with dueling networks
class DuelingDQN(nn.Module):

    def __init__(self, n, m, embedding_size=8, units=128):
        super(DuelingDQN, self).__init__()
        # The assignment becomes embedded, so it has size m * embedding_size
        # when flattened
        # The n comes from the values attached
        self.assignment_size = m * embedding_size
        self.input_size = self.assignment_size + n
        self.output_size = m * n
        self.n = n
        self.m = m
      
        self.units = units

        self.embedding_size = embedding_size
        # Embed the targets, since the actual numerical value of the
        # targets don't mean anything
        # Another idea: skip the middleman and replace the targets
        # with the target values
        self.embedding = nn.Embedding(n, self.embedding_size)
        self.lin1 = nn.Linear(self.input_size, units)
        self.drop1 = nn.Dropout(0.2)

        # Layer to measure the value of a state
        self.value_stream = nn.Sequential(
            nn.Linear(units, units),
            nn.ReLU(),
            nn.Linear(units, 1)
        )
        # Layer to measure the advantages of an action given a state
        self.advantage_stream = nn.Sequential(
            nn.Linear(units, units),
            nn.ReLU(),
            nn.Linear(units, self.output_size)
        )

    def forward(self, state):
        assignment = state[:, :self.m].long()
        assignment = self.embedding(assignment)


        values = state[:, self.m:].float()

        # Flatten the assignment embedding
        assignment = assignment.view(-1, self.assignment_size).float() 
        
        # and concatenate the values
        x = torch.cat([assignment, values], dim=1)
        x = F.relu(self.drop1(self.lin1(x)))
        values = self.value_stream(x)
        advantages = self.advantage_stream(x)
        qvals = values + (advantages - advantages.mean())
        
        return qvals

    def feature_size(self):
        return self.conv(autograd.Variable(torch.zeros(1, *self.input_dim))).view(1, -1).size(1)

In [109]:
def is_possible(state, weapon, target):
    '''
    We don't want to assign a weapon to the target if it is already assigned
    to the target, since this does not change the state at all.
    '''
    curr_target = state[weapon].item()
    return curr_target != target 

def select_action(model, state, n):
    # state: 1 * (n + m) tensor
    with torch.no_grad():
        model.eval()
        state_batch = torch.unsqueeze(state, 1).transpose(0, 1).float()
        largest = torch.sort(model(state_batch), descending=True, dim=1)[1]
        model.train()

        # Try until we get a valid action
        for i in largest[0]:
            weapon = i / n
            target = i % n

            if is_possible(state, weapon.item(), target.item()):
                return torch.tensor([i], device=device)

        # This should never happen
        raise ValueError('Invalid state: no possible action')
        
def decode_action(action, n):
        '''
        Given an action, return the weapon and target associated with
        that action.
        '''
        return action // n, action % n

# General GA

In [148]:
class WTA_1D_General_GA:
    def __init__(self, config, example_number, mutation_method='random'):
        self.config = config
        # n_targets: number of targets, n_weapons: number of weapons
        # values: list of target values (size: n_targets)
        # probabilities: list of destruction probabilities (size: n_weapons x n_targets)
        self.n_targets, self.n_weapons, self.values, self.probabilities = read_example_file(config['example_file'])
        # state: list of assignments (size: population_size x n_weapons, 0-indexed for target of each weapon)
        self.state = self.generate_initial_population(config['population_size'])
        self.good_gene_dict = self.get_good_gene_dict()
        self.mutation_method = mutation_method
        self.dqn_model = torch.load('./trained_models/WTA{}'.format(example_number), map_location=torch.device('cpu'))
        self.dqn_model.eval()
        self.dueling_dqn_model = torch.load('./trained_models/WTA{}_dueling'.format(example_number), map_location=torch.device('cpu'))
        self.dueling_dqn_model.eval()
    
    # get the best target for each weapon (used for ex_crossover)
    def get_good_gene_dict(self):
        good_gene_dict = {}
        for i in range(self.n_weapons):
            weapon_target_edv_list = []
            for j in range(self.n_targets):
                weapon_target_edv_list.append(self.values[j] * self.probabilities[i][j])
            good_gene = weapon_target_edv_list.index(max(weapon_target_edv_list))
            good_gene_dict[i] = good_gene
                
        return good_gene_dict
    
    # update state after crossover
    def ocp_crossover(self, population):
        crossover_state = []
        for _ in range(self.config['population_size']):
            father, mother = random.sample(population, 2)
            point = random.randint(0, self.n_weapons-1)
            child = father[:point] + mother[point:]
            crossover_state.append(child)
        return crossover_state
    
    # ex crossover as explained in the paper
    # repeat the following process for m_c < n_target times
    # 1. find genes (weapon-target pair) with the same value of target in both parents
    # 2. inherit good genes (good gene defined as the maximum target for each weapon)
    # 3. randomly select two genes not inhereited from parents
    # 4. exchange genes to generate offspring
    def ex_crossover(self, population):
        pool = []
        population = copy.deepcopy(population)
        for _ in range(self.config['n_offsprings'] // 2):
            father, mother = random.sample(population, 2)
            child1, child2 = father, mother
            for _ in range(self.config['m_c']):
                # step 2
                inherited_gene_list = []
                for i in range(self.n_weapons):
                    if father[i] == mother[i] and father[i] == self.good_gene_dict[i]: # inherit to child
                        inherited_gene_list.append(i)
                gene_swap_candidates = set(range(self.n_weapons)) - set(inherited_gene_list)
                if len(gene_swap_candidates) < 2:
                    break

                # step 3
                swap_idx1, swap_idx2 = random.sample(gene_swap_candidates, 2)
                child1[swap_idx1], child2[swap_idx2] = child2[swap_idx2], child1[swap_idx1]
                child1[swap_idx2], child2[swap_idx1] = child2[swap_idx1], child1[swap_idx2]
            
            pool.append(child1)
            pool.append(child2)
        return pool

    # update state after mutation
    """
    def mutate(self, learner):
        mutated_state = []
        for assignment in self.state:
            mutated_assignment = learner.get_mutation(assignment)
            mutated_state.append(mutated_assignment)
        self.state = mutated_state
    """
    def mutate_random(self, population):
        mutated_population = []
        for assignment in population:
            for _ in range(self.config['m_m']):
                # choose random gene
                mutated_weapon = random.sample(list(range(self.n_weapons)), 1)[0]
                mutated_target = random.sample(list(range(self.n_targets)), 1)[0]
                assignment[mutated_weapon] = mutated_target
            mutated_population.append(assignment)
        return mutated_population
    
    def mutate_dqn(self, population, model_type):
        mutated_population = []
        for assignment in population:
            for _ in range(self.config['m_m']):
                state = torch.tensor(np.concatenate([assignment, self.values]), device=torch.device('cpu'))
                if model_type == 'dqn':
                    action = select_action(self.dqn_model, state, self.n_weapons)
                elif model_type == 'dueling_dqn':
                    action = select_action(self.dueling_dqn_model, state, self.n_weapons)
                mutated_weapon, mutated_target = decode_action(action, self.n_weapons)
                mutated_weapon, mutated_target = mutated_weapon.numpy()[0], mutated_target.numpy()[0]
                assignment[mutated_weapon] = mutated_target
            mutated_population.append(assignment)
        return mutated_population
    
    def reward(self, assignment):
        survival_probabilities = [1] * self.n_targets
        for i in range(self.n_weapons):
            survival_probabilities[assignment[i]] *= 1 - self.probabilities[i][assignment[i]]
        reward = 0
        for j in range(self.n_targets):
            reward += self.values[j] * (1 - survival_probabilities[j])
        return reward
    
    # choose the best population from the pool of population + offspring
    def evolution_strategy(self, pool):
        pool = sorted(pool, key = lambda x: self.reward(x), reverse=True)
        return pool[:self.config['population_size']]
    
    # helper functions
    def generate_initial_population(self, population_size):
        population = []
        targets = list(range(self.n_targets))
        for i in range(population_size):
            assignment = [random.choice(targets) for _ in range(self.n_weapons)]
            population.append(assignment)
        return population
    
    def reset(self):
        self.state = [-1] * self.n_weapons
        
    def run(self, max_iter, verbose=False):
        population = self.generate_initial_population(self.config['population_size'])
#         print('initial population', population)
        assert self.mutation_method in ['random', 'dqn', 'dueling_dqn']
        for i_iter in range(max_iter):
            pool = self.ex_crossover(population)
            if self.mutation_method == 'random':
                pool = self.mutate_random(pool)
            elif self.mutation_method == 'dqn':
                pool = self.mutate_dqn(pool, self.mutation_method)
            elif self.mutation_method == 'dueling_dqn':
                pool = self.mutate_dqn(pool, self.mutation_method)
            population = self.evolution_strategy(pool + population)
            if all(x == population[0] for x in population):
                if verbose:
                    print('converged in iter {}'.format(i_iter+1))
                break
                
            if verbose:
                if (i_iter + 1) % 40 == 0:
                    print('iter {}: reward = {:.2f}'.format(i_iter+1, max(map(lambda x: self.reward(x), population))))
                    candidates = copy.deepcopy(population)
                    candidates = sorted(candidates, key = lambda x: self.reward(x), reverse=True)
                    print(candidates[0])
        population = sorted(population, key = lambda x: self.reward(x), reverse=True)
        return population[0], self.reward(population[0]), i_iter+1

In [128]:
config = {
    'example_file': './examples/WTA1',
    'population_size': 50,
    'n_offsprings': 50,
    'm_c': 1,
    'm_m': 1 # for fair comparison set m_m to 1 
}

In [129]:
wta_1d = WTA_1D_General_GA(config, 1, mutation_method='dqn')

wta_1d.run(800, verbose=True)

iter 40: reward = 328.64
[4, 3, 2, 1, 0]
iter 80: reward = 328.64
[4, 3, 2, 1, 0]
iter 120: reward = 328.64
[4, 3, 2, 1, 0]
iter 160: reward = 328.64
[4, 3, 2, 1, 0]
iter 200: reward = 328.64
[4, 3, 2, 1, 0]
iter 240: reward = 328.64
[4, 3, 2, 1, 0]
iter 280: reward = 328.64
[4, 3, 2, 1, 0]
iter 320: reward = 328.64
[4, 3, 2, 1, 0]
iter 360: reward = 328.64
[4, 3, 2, 1, 0]
iter 400: reward = 328.64
[4, 3, 2, 1, 0]
iter 440: reward = 328.64
[4, 3, 2, 1, 0]
iter 480: reward = 328.64
[4, 3, 2, 1, 0]
iter 520: reward = 328.64
[4, 3, 2, 1, 0]
iter 560: reward = 328.64
[4, 3, 2, 1, 0]
iter 600: reward = 328.64
[4, 3, 2, 1, 0]
iter 640: reward = 328.64
[4, 3, 2, 1, 0]
iter 680: reward = 328.64
[4, 3, 2, 1, 0]
iter 720: reward = 328.64
[4, 3, 2, 1, 0]
iter 760: reward = 328.64
[4, 3, 2, 1, 0]
iter 800: reward = 328.64
[4, 3, 2, 1, 0]


([4, 3, 2, 1, 0], 328.636, 800)

# Experiments

## WTA1

In [121]:
N_WEAPONS = 5

In [132]:
fpout = open('./experiments/WTA1/random_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter'])
for m_population in range(5, 31, 5): # population multiplier
    for m_offsprings in range(5, 31, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA1',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 1, mutation_method='random')
        result, reward, convergence_iter = wta_1d.run(1000, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 159
m_population: 5, m_offsprings: 10 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 64
m_population: 5, m_offsprings: 15 - result: [4, 3, 2, 1, 4], reward: 310.192, convergence: 18
m_population: 5, m_offsprings: 20 - result: [4, 0, 3, 1, 2], reward: 291.488, convergence: 3
m_population: 5, m_offsprings: 25 - result: [1, 3, 2, 4, 1], reward: 298.549, convergence: 3
m_population: 5, m_offsprings: 30 - result: [4, 3, 2, 1, 2], reward: 304.721, convergence: 18
m_population: 10, m_offsprings: 5 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 89
m_population: 10, m_offsprings: 10 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 245
m_population: 10, m_offsprings: 15 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 229
m_population: 10, m_offsprings: 20 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 420
m_population: 10, m_offsprings: 25 - result: [4, 3, 2, 1

In [130]:
fpout = open('./experiments/WTA1/dqn_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter'])
for m_population in range(5, 31, 5): # population multiplier
    for m_offsprings in range(5, 31, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA1',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 1, mutation_method='dqn')
        result, reward, convergence_iter = wta_1d.run(1000, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 14
m_population: 5, m_offsprings: 10 - result: [2, 3, 0, 1, 4], reward: 319.545, convergence: 3
m_population: 5, m_offsprings: 15 - result: [2, 0, 1, 3, 4], reward: 300.508, convergence: 5
m_population: 5, m_offsprings: 20 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 5
m_population: 5, m_offsprings: 25 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 7
m_population: 5, m_offsprings: 30 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 3
m_population: 10, m_offsprings: 5 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 40
m_population: 10, m_offsprings: 10 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 18
m_population: 10, m_offsprings: 15 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 14
m_population: 10, m_offsprings: 20 - result: [0, 3, 2, 1, 4], reward: 327.082, convergence: 14
m_population: 10, m_offsprings: 25 - result: [0, 3, 2, 1, 4], r

In [131]:
fpout = open('./experiments/WTA1/dueling_dqn_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter'])
for m_population in range(5, 31, 5): # population multiplier
    for m_offsprings in range(5, 31, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA1',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 1, mutation_method='dueling_dqn')
        result, reward, convergence_iter = wta_1d.run(1000, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 165
m_population: 5, m_offsprings: 10 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 626
m_population: 5, m_offsprings: 15 - result: [2, 3, 0, 1, 4], reward: 319.545, convergence: 212
m_population: 5, m_offsprings: 20 - result: [2, 3, 0, 1, 4], reward: 319.545, convergence: 1000
m_population: 5, m_offsprings: 25 - result: [4, 3, 0, 1, 2], reward: 310.818, convergence: 1
m_population: 5, m_offsprings: 30 - result: [4, 3, 0, 1, 2], reward: 310.818, convergence: 1
m_population: 10, m_offsprings: 5 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 110
m_population: 10, m_offsprings: 10 - result: [4, 3, 0, 1, 2], reward: 310.818, convergence: 5
m_population: 10, m_offsprings: 15 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 200
m_population: 10, m_offsprings: 20 - result: [4, 3, 2, 1, 0], reward: 328.636, convergence: 261
m_population: 10, m_offsprings: 25 - result: [4, 3, 0

## WTA2

In [150]:
N_WEAPONS = 10
max_iter = 3000

In [142]:
fpout = open('./experiments/WTA2/random_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter'])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA2',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 2, mutation_method='random')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [3, 8, 2, 6, 5, 0, 1, 4, 9, 7], reward: 622.688, convergence: 1762
m_population: 5, m_offsprings: 10 - result: [2, 8, 7, 6, 5, 0, 1, 4, 9, 3], reward: 616.032, convergence: 2609
m_population: 5, m_offsprings: 15 - result: [3, 8, 9, 6, 5, 0, 2, 4, 1, 7], reward: 620.031, convergence: 3000
m_population: 5, m_offsprings: 20 - result: [2, 4, 8, 9, 0, 1, 7, 6, 9, 3], reward: 552.103, convergence: 1625
m_population: 10, m_offsprings: 5 - result: [2, 8, 3, 6, 5, 0, 1, 4, 9, 7], reward: 619.627, convergence: 3000
m_population: 10, m_offsprings: 10 - result: [3, 8, 2, 6, 5, 0, 1, 4, 9, 7], reward: 622.688, convergence: 2142
m_population: 10, m_offsprings: 15 - result: [3, 8, 9, 6, 5, 0, 1, 4, 2, 7], reward: 618.250, convergence: 3000
m_population: 10, m_offsprings: 20 - result: [3, 8, 2, 6, 5, 0, 1, 4, 9, 7], reward: 622.688, convergence: 3000
m_population: 15, m_offsprings: 5 - result: [3, 8, 2, 6, 5, 0, 1, 4, 9, 7], reward: 622.688, convergence: 3000

In [151]:
fpout = open('./experiments/WTA2/dqn_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA2',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 2, mutation_method='dqn')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [6, 0, 2, 9, 4, 8, 1, 3, 9, 7], reward: 584.261, convergence: 3000
m_population: 5, m_offsprings: 10 - result: [2, 8, 9, 6, 4, 0, 7, 3, 1, 3], reward: 597.863, convergence: 3000
m_population: 5, m_offsprings: 15 - result: [0, 8, 7, 3, 4, 2, 1, 2, 9, 0], reward: 536.415, convergence: 82
m_population: 5, m_offsprings: 20 - result: [3, 0, 9, 1, 4, 8, 7, 6, 2, 9], reward: 573.755, convergence: 3000
m_population: 10, m_offsprings: 5 - result: [2, 8, 9, 6, 4, 0, 1, 7, 7, 3], reward: 596.992, convergence: 3000
m_population: 10, m_offsprings: 10 - result: [3, 0, 2, 6, 4, 8, 1, 3, 9, 7], reward: 589.522, convergence: 3000
m_population: 10, m_offsprings: 15 - result: [0, 8, 9, 6, 4, 2, 1, 7, 5, 3], reward: 603.224, convergence: 2762
m_population: 10, m_offsprings: 20 - result: [0, 8, 9, 6, 4, 2, 1, 7, 5, 3], reward: 603.224, convergence: 2466
m_population: 15, m_offsprings: 5 - result: [2, 8, 9, 6, 4, 0, 7, 3, 1, 3], reward: 597.863, convergence: 3000
m

In [152]:
fpout = open('./experiments/WTA2/dueling_dqn_mutation.csv', 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA2',
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, 2, mutation_method='dueling_dqn')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [4, 3, 8, 2, 0, 9, 1, 7, 5, 6], reward: 553.869, convergence: 7
m_population: 5, m_offsprings: 10 - result: [4, 3, 8, 2, 0, 9, 1, 7, 5, 6], reward: 553.869, convergence: 3
m_population: 5, m_offsprings: 15 - result: [4, 3, 8, 2, 0, 9, 1, 7, 5, 6], reward: 553.869, convergence: 2
m_population: 5, m_offsprings: 20 - result: [4, 3, 8, 2, 0, 9, 1, 7, 5, 6], reward: 553.869, convergence: 2
m_population: 10, m_offsprings: 5 - result: [6, 8, 9, 2, 4, 0, 1, 7, 5, 3], reward: 603.382, convergence: 713
m_population: 10, m_offsprings: 10 - result: [6, 8, 9, 2, 4, 0, 1, 7, 5, 3], reward: 603.382, convergence: 473
m_population: 10, m_offsprings: 15 - result: [4, 3, 8, 2, 0, 9, 1, 7, 5, 6], reward: 553.869, convergence: 4
m_population: 10, m_offsprings: 20 - result: [6, 8, 9, 2, 0, 4, 1, 7, 5, 3], reward: 601.917, convergence: 500
m_population: 15, m_offsprings: 5 - result: [3, 8, 9, 6, 2, 0, 1, 4, 5, 7], reward: 616.805, convergence: 3000
m_population: 15,

## WTA3

In [155]:
N_WEAPONS = 20
max_iter = 5000
N_WTA = 3

In [156]:
fpout = open('./experiments/WTA{}/random_mutation.csv'.format(N_WTA), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier, offsprings_multiplier, result, reward, convergence_iter'])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA{}'.format(N_WTA),
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='random')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [2, 4, 17, 8, 1, 18, 5, 10, 7, 12, 3, 19, 13, 15, 11, 6, 9, 14, 16, 0], reward: 943.237, convergence: 5000
m_population: 5, m_offsprings: 10 - result: [19, 6, 11, 9, 13, 10, 3, 17, 7, 12, 18, 14, 2, 0, 4, 15, 8, 5, 16, 1], reward: 937.955, convergence: 5000
m_population: 5, m_offsprings: 15 - result: [11, 14, 17, 15, 6, 8, 12, 13, 2, 5, 18, 4, 16, 10, 0, 7, 0, 4, 6, 9], reward: 769.975, convergence: 731
m_population: 5, m_offsprings: 20 - result: [0, 9, 17, 16, 10, 10, 9, 8, 14, 12, 4, 13, 11, 11, 8, 3, 5, 7, 2, 7], reward: 779.508, convergence: 620
m_population: 10, m_offsprings: 5 - result: [6, 1, 2, 9, 3, 18, 5, 10, 7, 12, 15, 19, 13, 4, 11, 17, 8, 0, 16, 14], reward: 952.597, convergence: 5000
m_population: 10, m_offsprings: 10 - result: [4, 8, 5, 9, 1, 3, 2, 13, 7, 6, 10, 18, 14, 12, 11, 17, 19, 0, 15, 16], reward: 940.441, convergence: 5000
m_population: 10, m_offsprings: 15 - result: [2, 6, 11, 9, 15, 3, 5, 8, 10, 19, 4, 18, 13, 7, 1, 1

In [158]:
fpout = open('./experiments/WTA{}/dqn_mutation.csv'.format(N_WTA), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier', 'offsprings_multiplier', 'result', 'reward', 'convergence_iter'])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA{}'.format(N_WTA),
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dqn')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 22
m_population: 5, m_offsprings: 10 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 7
m_population: 5, m_offsprings: 15 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 4
m_population: 5, m_offsprings: 20 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 4
m_population: 10, m_offsprings: 5 - result: [19, 6, 17, 9, 3, 10, 5, 13, 7, 12, 18, 4, 2, 15, 11, 0, 8, 14, 16, 1], reward: 944.753, convergence: 5000
m_population: 10, m_offsprings: 10 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 18
m_population: 10, m_offsprings: 15 - result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7

In [159]:
fpout = open('./experiments/WTA{}/dueling_dqn_mutation.csv'.format(N_WTA), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['population_multiplier', 'offsprings_multiplier', 'result', 'reward', 'convergence_iter'])
for m_population in range(5, 21, 5): # population multiplier
    for m_offsprings in range(5, 21, 5): # offsprings multiplier
        config = {
            'example_file': './examples/WTA{}'.format(N_WTA),
            'population_size': N_WEAPONS * m_population,
            'n_offsprings': N_WEAPONS * m_offsprings,
            'm_c': 1,
            'm_m': 1 # for fair comparison set m_m to 1
        }
        wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dueling_dqn')
        result, reward, convergence_iter = wta_1d.run(max_iter, verbose=False)
        print('m_population: {}, m_offsprings: {} - result: {}, reward: {:.3f}, convergence: {}'.format(m_population, m_offsprings, result, reward, convergence_iter))
        wr.writerow([m_population, m_offsprings, '_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
        
fpout.close()

m_population: 5, m_offsprings: 5 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 18
m_population: 5, m_offsprings: 10 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 4
m_population: 5, m_offsprings: 15 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 3
m_population: 5, m_offsprings: 20 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 2
m_population: 10, m_offsprings: 5 - result: [19, 1, 5, 9, 18, 2, 4, 13, 10, 6, 3, 12, 14, 0, 11, 17, 8, 7, 15, 16], reward: 951.467, convergence: 5000
m_population: 10, m_offsprings: 10 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 12
m_population: 10, m_offsprings: 15 - result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9

# Questions

1. What happens if m_m or m_c larger than 1 - not much difference for WTA1 and WTA2
2. Look into what happens when the program hits maximum iter
3. Choose one m_population and m_offsprings and increase max_iter
4. Compare using the same number of convergence instead of until it reaches convergence