In [64]:
import random
import math
import copy
import csv

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

params = {'legend.fontsize': 20,
          'figure.figsize': (15, 5),
         'axes.labelsize': 25,
         'axes.titlesize':20,
         'xtick.labelsize': 20,
         'ytick.labelsize':20}

plt.rcParams.update(params)

In [14]:
# read example file
def read_example_file(filename):
    with open(filename) as fp:
        prob_setting = list(map(lambda x: int(x), fp.readline().split()))
        if len(prob_setting) == 1:
            n_targets, n_weapons = prob_setting[0], prob_setting[0]
        else:
            n_targets, n_weapons = prob_setting

        values = []
        for i in range(n_targets):
            values.append(float(fp.readline()))

        probabilities = []
        for i in range(n_weapons):
            probabilities.append([])
            for j in range(n_targets):
                probabilities[i].append(float(fp.readline()))
    return n_targets, n_weapons, values, probabilities

In [15]:
# check convergence
def check_convergence(population, threshold):
    assignment_dict = {}
    for assignment in population:
        assignment = tuple(assignment)
        if assignment not in assignment_dict:
            assignment_dict[assignment] = 0
        assignment_dict[assignment] += 1
    for assignment in assignment_dict:
        if assignment_dict[assignment] >= len(population) * threshold:
            return True
    return False

# DQN-related code

In [16]:
'''Take in n by m matrix, convert it to 1D feature vector '''
class DQN(nn.Module):
    def __init__(self, n, m, embedding_size=8, units=64):
        super(DQN, self).__init__()
        # The assignment becomes embedded, so it has size m * embedding_size
        # when flattened
        # The n comes from the values attached
        self.assignment_size = m * embedding_size
        self.input_size = self.assignment_size + n
        self.output_size = m * n
        self.n = n
        self.m = m
    
        self.embedding_size = embedding_size
        # Embed the targets, since the actual numerical value of the
        # targets don't mean anything
        # Another idea: skip the middleman and replace the targets
        # with the target values
        self.embedding = nn.Embedding(n, self.embedding_size)
        self.lin1 = nn.Linear(self.input_size, units)
        self.drop1 = nn.Dropout(0.2)
        self.lin2 = nn.Linear(units, self.output_size)
        self.drop2 = nn.Dropout(0.2)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, state):
        assignment = state[:, :self.m].long()
        assignment = self.embedding(assignment)
        
        values = state[:, self.m:].float()
                
        # Flatten the assignment embedding
        assignment = assignment.view(-1, self.assignment_size).float() 
        
        # and concatenate the values
        x = torch.cat([assignment, values], dim=1)
        
        x = F.relu(self.drop1(self.lin1(x)))
        x = F.relu(self.lin2(x))
        return x

# with dueling networks
class DuelingDQN(nn.Module):

    def __init__(self, n, m, embedding_size=8, units=128):
        super(DuelingDQN, self).__init__()
        # The assignment becomes embedded, so it has size m * embedding_size
        # when flattened
        # The n comes from the values attached
        self.assignment_size = m * embedding_size
        self.input_size = self.assignment_size + n
        self.output_size = m * n
        self.n = n
        self.m = m
      
        self.units = units

        self.embedding_size = embedding_size
        # Embed the targets, since the actual numerical value of the
        # targets don't mean anything
        # Another idea: skip the middleman and replace the targets
        # with the target values
        self.embedding = nn.Embedding(n, self.embedding_size)
        self.lin1 = nn.Linear(self.input_size, units)
        self.drop1 = nn.Dropout(0.2)

        # Layer to measure the value of a state
        self.value_stream = nn.Sequential(
            nn.Linear(units, units),
            nn.ReLU(),
            nn.Linear(units, 1)
        )
        # Layer to measure the advantages of an action given a state
        self.advantage_stream = nn.Sequential(
            nn.Linear(units, units),
            nn.ReLU(),
            nn.Linear(units, self.output_size)
        )

    def forward(self, state):
        assignment = state[:, :self.m].long()
        assignment = self.embedding(assignment)


        values = state[:, self.m:].float()

        # Flatten the assignment embedding
        assignment = assignment.view(-1, self.assignment_size).float() 
        
        # and concatenate the values
        x = torch.cat([assignment, values], dim=1)
        x = F.relu(self.drop1(self.lin1(x)))
        values = self.value_stream(x)
        advantages = self.advantage_stream(x)
        qvals = values + (advantages - advantages.mean())
        
        return qvals

    def feature_size(self):
        return self.conv(autograd.Variable(torch.zeros(1, *self.input_dim))).view(1, -1).size(1)

In [17]:
def is_possible(state, weapon, target):
    '''
    We don't want to assign a weapon to the target if it is already assigned
    to the target, since this does not change the state at all.
    '''
    curr_target = state[weapon].item()
    return curr_target != target 

def select_action(model, state, n):
    # state: 1 * (n + m) tensor
    with torch.no_grad():
        model.eval()
        state_batch = torch.unsqueeze(state, 1).transpose(0, 1).float()
        largest = torch.sort(model(state_batch), descending=True, dim=1)[1]
        model.train()

        # Try until we get a valid action
        for i in largest[0]:
            weapon = i / n
            target = i % n

            if is_possible(state, weapon.item(), target.item()):
                return torch.tensor([i], device=device)

        # This should never happen
        raise ValueError('Invalid state: no possible action')
        
def decode_action(action, n):
        '''
        Given an action, return the weapon and target associated with
        that action.
        '''
        return action // n, action % n

# General GA

In [50]:
class WTA_1D_General_GA:
    def __init__(self, config, example_number, mutation_method='random'):
        self.config = config
        # n_targets: number of targets, n_weapons: number of weapons
        # values: list of target values (size: n_targets)
        # probabilities: list of destruction probabilities (size: n_weapons x n_targets)
        self.n_targets, self.n_weapons, self.values, self.probabilities = read_example_file(config['example_file'])
        # state: list of assignments (size: population_size x n_weapons, 0-indexed for target of each weapon)
        self.state = self.generate_initial_population(config['population_size'])
        self.good_gene_dict = self.get_good_gene_dict()
        self.mutation_method = mutation_method
        self.dqn_model = torch.load('./trained_models/WTA{}'.format(example_number), map_location=torch.device('cpu'))
        self.dqn_model.eval()
        self.dueling_dqn_model = torch.load('./trained_models/WTA{}_dueling'.format(example_number), map_location=torch.device('cpu'))
        self.dueling_dqn_model.eval()
    
    # get the best target for each weapon (used for ex_crossover)
    def get_good_gene_dict(self):
        good_gene_dict = {}
        for i in range(self.n_weapons):
            weapon_target_edv_list = []
            for j in range(self.n_targets):
                weapon_target_edv_list.append(self.values[j] * self.probabilities[i][j])
            good_gene = weapon_target_edv_list.index(max(weapon_target_edv_list))
            good_gene_dict[i] = good_gene
                
        return good_gene_dict
    
    # update state after crossover
    def ocp_crossover(self, population):
        crossover_state = []
        for _ in range(self.config['population_size']):
            father, mother = random.sample(population, 2)
            point = random.randint(0, self.n_weapons-1)
            child = father[:point] + mother[point:]
            crossover_state.append(child)
        return crossover_state
    
    # ex crossover as explained in the paper
    # repeat the following process for m_c < n_target times
    # 1. find genes (weapon-target pair) with the same value of target in both parents
    # 2. inherit good genes (good gene defined as the maximum target for each weapon)
    # 3. randomly select two genes not inhereited from parents
    # 4. exchange genes to generate offspring
    def ex_crossover(self, population):
        pool = []
        population = copy.deepcopy(population)
        for _ in range(self.config['n_offsprings'] // 2):
            father, mother = random.sample(population, 2)
            child1, child2 = father, mother
            for _ in range(self.config['m_c']):
                # step 2
                inherited_gene_list = []
                for i in range(self.n_weapons):
                    if father[i] == mother[i] and father[i] == self.good_gene_dict[i]: # inherit to child
                        inherited_gene_list.append(i)
                gene_swap_candidates = set(range(self.n_weapons)) - set(inherited_gene_list)
                if len(gene_swap_candidates) < 2:
                    break

                # step 3
                swap_idx1, swap_idx2 = random.sample(gene_swap_candidates, 2)
                child1[swap_idx1], child2[swap_idx2] = child2[swap_idx2], child1[swap_idx1]
                child1[swap_idx2], child2[swap_idx1] = child2[swap_idx1], child1[swap_idx2]
            
            pool.append(child1)
            pool.append(child2)
        return pool

    # update state after mutation
    """
    def mutate(self, learner):
        mutated_state = []
        for assignment in self.state:
            mutated_assignment = learner.get_mutation(assignment)
            mutated_state.append(mutated_assignment)
        self.state = mutated_state
    """
    def mutate_random(self, population):
        mutated_population = []
        for assignment in population:
            for _ in range(self.config['m_m']):
                # choose random gene
                mutated_weapon = random.sample(list(range(self.n_weapons)), 1)[0]
                mutated_target = random.sample(list(range(self.n_targets)), 1)[0]
                assignment[mutated_weapon] = mutated_target
            mutated_population.append(assignment)
        return mutated_population
    
    def mutate_dqn(self, population, model_type):
        mutated_population = []
        for assignment in population:
            for _ in range(self.config['m_m']):
                state = torch.tensor(np.concatenate([assignment, self.values]), device=torch.device('cpu'))
                if model_type == 'dqn':
                    action = select_action(self.dqn_model, state, self.n_weapons)
                elif model_type == 'dueling_dqn':
                    action = select_action(self.dueling_dqn_model, state, self.n_weapons)
                mutated_weapon, mutated_target = decode_action(action, self.n_weapons)
                mutated_weapon, mutated_target = mutated_weapon.numpy()[0], mutated_target.numpy()[0]
                assignment[mutated_weapon] = mutated_target
            mutated_population.append(assignment)
        return mutated_population
    
    def reward(self, assignment):
        survival_probabilities = [1] * self.n_targets
        for i in range(self.n_weapons):
            survival_probabilities[assignment[i]] *= 1 - self.probabilities[i][assignment[i]]
        reward = 0
        for j in range(self.n_targets):
            reward += self.values[j] * (1 - survival_probabilities[j])
        return reward
    
    # choose the best population from the pool of population + offspring
    def evolution_strategy(self, pool):
        pool = sorted(pool, key = lambda x: self.reward(x), reverse=True)
        return pool[:self.config['population_size']]
    
    # helper functions
    def generate_initial_population(self, population_size):
        population = []
        targets = list(range(self.n_targets))
        for i in range(population_size):
            assignment = [random.choice(targets) for _ in range(self.n_weapons)]
            population.append(assignment)
        return population
    
    def reset(self):
        self.state = [-1] * self.n_weapons
        
    def run(self, max_iter, convergence_threshold=1.0, verbose=False, fp=None):
        population = self.generate_initial_population(self.config['population_size'])
#         print('initial population', population)
        assert self.mutation_method in ['random', 'dqn', 'dueling_dqn']
        for i_iter in range(max_iter):
            pool = self.ex_crossover(population)
            if self.mutation_method == 'random':
                pool = self.mutate_random(pool)
            elif self.mutation_method == 'dqn':
                pool = self.mutate_dqn(pool, self.mutation_method)
            elif self.mutation_method == 'dueling_dqn':
                pool = self.mutate_dqn(pool, self.mutation_method)
            population = self.evolution_strategy(pool + population)
            
            if fp is not None:
                reward = max(map(lambda x: self.reward(x), population))
                result = sorted(population, key = lambda x: self.reward(x), reverse=True)[0]
                out_list = [str(i_iter + 1), '{:.2f}'.format(reward), '_'.join(map(lambda x: str(x), result))]
                fp.write(','.join(out_list) + '\n')
            
            # check convergence
            if check_convergence(population, convergence_threshold):
                if verbose:
                    print('converged in iter {}'.format(i_iter+1))
                break
                
            if verbose:
                if (i_iter + 1) % 40 == 0:
                    print('iter {}: reward = {:.2f}'.format(i_iter+1, max(map(lambda x: self.reward(x), population))))
                    candidates = copy.deepcopy(population)
                    candidates = sorted(candidates, key = lambda x: self.reward(x), reverse=True)
                    print(candidates[0])
        population = sorted(population, key = lambda x: self.reward(x), reverse=True)
        return population[0], self.reward(population[0]), i_iter+1

In [19]:
config = {
    'example_file': './examples/WTA1',
    'population_size': 50,
    'n_offsprings': 50,
    'm_c': 1,
    'm_m': 1 # for fair comparison set m_m to 1
}

In [29]:
wta_1d = WTA_1D_General_GA(config, 1, mutation_method='dqn')

wta_1d.run(800, convergence_threshold=0.5, verbose=True)

converged in iter 16


([4, 3, 2, 1, 0], 328.636, 16)

# Experiments

In [39]:
N_WEAPONS = 20
max_iter = 5000
N_WTA = 3
M_POPULATION = 15
M_OFFSPRINGS = 10
N_EXPERIMENTS = 8

config = {
    'example_file': './examples/WTA{}'.format(N_WTA),
    'population_size': N_WEAPONS * M_POPULATION,
    'n_offsprings': N_WEAPONS * M_OFFSPRINGS,
    'm_c': 1,
    'm_m': 1 # for fair comparison set m_m to 1
}

## Convergence threshold 0.5

In [51]:
convergence_threshold = 0.5

fpout = open('./experiments/convergence/WTA{}_random_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_random_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='random')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [6, 1, 5, 8, 18, 2, 0, 17, 10, 12, 4, 19, 13, 9, 11, 3, 14, 7, 15, 16], reward: 945.548, convergence: 10000
result: [19, 1, 5, 9, 18, 3, 2, 10, 7, 6, 17, 12, 13, 4, 11, 14, 8, 0, 15, 16], reward: 952.743, convergence: 10000
result: [2, 1, 17, 9, 15, 3, 5, 10, 7, 12, 18, 19, 13, 4, 11, 6, 8, 0, 16, 14], reward: 957.366, convergence: 10000
result: [18, 6, 11, 9, 1, 2, 5, 13, 10, 12, 3, 19, 14, 0, 4, 17, 8, 7, 15, 16], reward: 954.871, convergence: 10000
result: [11, 1, 2, 8, 18, 3, 5, 10, 7, 12, 17, 19, 13, 9, 4, 6, 14, 0, 15, 16], reward: 955.313, convergence: 10000
result: [2, 8, 17, 9, 15, 10, 5, 1, 7, 12, 3, 18, 13, 0, 4, 6, 14, 19, 16, 11], reward: 949.261, convergence: 10000
result: [11, 6, 5, 9, 1, 18, 2, 8, 10, 12, 3, 19, 13, 4, 7, 17, 14, 0, 15, 16], reward: 951.999, convergence: 10000
result: [2, 1, 11, 8, 0, 18, 3, 10, 7, 12, 17, 19, 13, 9, 4, 6, 14, 5, 15, 16], reward: 950.269, convergence: 10000
result: [2, 7, 17, 9, 1, 19, 3, 8, 10, 0, 18, 12, 13, 4, 11, 6, 14, 5, 1

In [52]:
convergence_threshold = 0.5

fpout = open('./experiments/convergence/WTA{}_dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 1, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 6, 2, 8, 7, 9, 11], reward: 905.870, convergence: 119
result: [19, 2, 5, 10, 3, 18, 16, 11, 13, 12, 17, 4, 0, 15, 1, 6, 8, 7, 9, 14], reward: 906.487, convergence: 135
result: [19, 6, 17, 10, 3, 18, 16, 11, 13, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 14], reward: 899.738, convergence: 30
result: [19, 6, 2, 9, 16, 18, 5, 13, 10, 12, 3, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 952.806, convergence: 3881
result: [13, 6, 17, 10, 19, 18, 9, 11, 14, 12, 3, 4, 0, 15, 1, 2, 8, 7, 16, 5], reward: 896.366, convergence: 70
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 17, 4, 14, 15, 11, 0, 8, 7, 16, 1], reward: 948.175, convergence: 2757
result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], reward: 905.486, convergence: 68
result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], reward: 905.486, convergence: 77
result: [11, 6, 17, 9, 15, 18, 5, 13, 10, 12, 3, 4, 2, 0, 14, 2, 8, 7, 16, 1], reward: 926

In [53]:
convergence_threshold = 0.5

fpout = open('./experiments/convergence/WTA{}_dueling-dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dueling-dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dueling_dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 6, 3, 12, 14, 8, 17, 15, 18, 9, 7, 16], reward: 882.168, convergence: 35
result: [19, 0, 5, 8, 4, 2, 9, 1, 10, 15, 3, 12, 14, 11, 17, 6, 18, 13, 7, 16], reward: 892.451, convergence: 25
result: [19, 6, 5, 9, 1, 2, 4, 13, 10, 0, 3, 12, 14, 18, 11, 17, 8, 7, 15, 16], reward: 946.660, convergence: 1309
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 0, 17, 12, 14, 4, 11, 6, 8, 7, 15, 16], reward: 953.059, convergence: 4587
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 15, 16], reward: 952.873, convergence: 8260
result: [19, 1, 5, 18, 17, 2, 3, 13, 10, 0, 4, 12, 14, 9, 11, 6, 8, 7, 17, 16], reward: 936.936, convergence: 1687
result: [19, 7, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 1, 11, 15, 8, 0, 17, 16], reward: 941.305, convergence: 1372
result: [19, 0, 5, 11, 4, 2, 9, 7, 10, 15, 3, 12, 14, 1, 17, 6, 8, 13, 18, 16], reward: 888.038, convergence: 46
result: [19, 0, 5, 11, 4, 2, 9, 13, 10, 15, 3, 12, 14, 18, 17, 6, 8, 7, 17, 16], rewar

## Convergence threshold 0.6

In [54]:
convergence_threshold = 0.6

fpout = open('./experiments/convergence/WTA{}_random_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_random_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='random')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [11, 1, 2, 8, 15, 3, 5, 13, 10, 0, 17, 12, 18, 9, 4, 6, 14, 7, 19, 16], reward: 948.350, convergence: 10000
result: [18, 8, 5, 9, 1, 3, 4, 13, 10, 12, 17, 19, 2, 0, 11, 6, 14, 7, 15, 16], reward: 951.667, convergence: 10000
result: [11, 6, 2, 9, 1, 10, 3, 17, 7, 12, 18, 19, 13, 0, 4, 14, 8, 5, 15, 16], reward: 951.071, convergence: 10000
result: [11, 1, 2, 9, 15, 10, 3, 18, 7, 6, 4, 19, 13, 0, 14, 17, 8, 5, 16, 12], reward: 948.942, convergence: 10000
result: [11, 1, 17, 18, 16, 2, 5, 8, 10, 12, 3, 19, 13, 9, 7, 6, 14, 0, 15, 4], reward: 949.475, convergence: 10000
result: [6, 1, 11, 9, 3, 2, 5, 10, 7, 12, 17, 18, 13, 0, 4, 14, 8, 19, 15, 16], reward: 952.683, convergence: 10000
result: [2, 1, 7, 9, 15, 18, 5, 17, 10, 6, 4, 12, 13, 0, 11, 3, 8, 19, 16, 14], reward: 949.190, convergence: 10000
result: [2, 1, 17, 9, 4, 18, 5, 8, 10, 0, 3, 19, 13, 12, 11, 6, 14, 7, 15, 16], reward: 955.235, convergence: 10000
result: [11, 6, 2, 8, 3, 18, 5, 10, 7, 12, 4, 19, 13, 9, 1, 17, 14, 0, 1

In [55]:
convergence_threshold = 0.6

fpout = open('./experiments/convergence/WTA{}_dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], reward: 905.486, convergence: 63
result: [11, 6, 17, 9, 15, 18, 5, 13, 10, 12, 3, 4, 14, 0, 1, 2, 8, 7, 19, 16], reward: 946.210, convergence: 4576
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 15, 4, 14, 0, 11, 17, 8, 7, 16, 1], reward: 950.907, convergence: 2743
result: [11, 6, 17, 10, 3, 18, 19, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 16], reward: 915.643, convergence: 105
result: [19, 6, 17, 10, 3, 18, 16, 11, 13, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 14], reward: 899.738, convergence: 61
result: [19, 6, 17, 10, 3, 18, 16, 11, 13, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 14], reward: 899.738, convergence: 73
result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 30
result: [19, 6, 2, 9, 3, 18, 5, 1, 10, 12, 16, 4, 13, 0, 11, 17, 8, 7, 15, 14], reward: 948.873, convergence: 5212
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 16, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 9

In [56]:
convergence_threshold = 0.6

fpout = open('./experiments/convergence/WTA{}_dueling-dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dueling-dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dueling_dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 18, 17, 6, 8, 9, 7, 16], reward: 895.464, convergence: 42
result: [19, 0, 5, 11, 4, 2, 1, 13, 10, 15, 3, 12, 14, 18, 17, 6, 8, 9, 7, 16], reward: 903.258, convergence: 26
result: [19, 0, 5, 11, 4, 2, 9, 13, 10, 15, 3, 12, 14, 18, 17, 6, 8, 18, 7, 16], reward: 887.022, convergence: 37
result: [19, 0, 5, 11, 17, 2, 13, 1, 10, 15, 3, 12, 14, 8, 4, 6, 9, 18, 7, 16], reward: 889.925, convergence: 47
result: [19, 0, 5, 11, 4, 2, 8, 13, 10, 15, 3, 12, 14, 9, 17, 6, 18, 7, 1, 16], reward: 890.960, convergence: 44
result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 876.899, convergence: 24
result: [19, 0, 5, 11, 18, 2, 1, 13, 10, 15, 3, 12, 14, 8, 7, 6, 18, 9, 17, 16], reward: 878.161, convergence: 24
result: [19, 0, 5, 11, 4, 2, 9, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 13, 7, 16], reward: 884.122, convergence: 28
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 17, 16], reward: 938.65

## Convergence threshold 0.7

In [57]:
convergence_threshold = 0.7

fpout = open('./experiments/convergence/WTA{}_random_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_random_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='random')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [2, 15, 17, 9, 1, 10, 3, 13, 7, 12, 4, 18, 14, 0, 11, 6, 8, 5, 19, 16], reward: 949.812, convergence: 10000
result: [11, 1, 17, 9, 0, 2, 5, 18, 10, 12, 3, 19, 13, 4, 14, 6, 8, 7, 15, 16], reward: 954.166, convergence: 10000
result: [2, 8, 17, 9, 1, 18, 5, 10, 7, 12, 3, 19, 13, 4, 11, 6, 14, 0, 15, 16], reward: 960.188, convergence: 10000
result: [19, 1, 5, 9, 15, 10, 3, 8, 7, 12, 17, 18, 13, 4, 11, 6, 14, 0, 16, 2], reward: 954.401, convergence: 10000
result: [15, 8, 7, 9, 1, 2, 5, 17, 10, 12, 3, 18, 13, 4, 11, 6, 14, 0, 19, 16], reward: 951.906, convergence: 10000
result: [18, 8, 6, 9, 1, 3, 2, 10, 4, 12, 17, 19, 5, 0, 11, 13, 14, 7, 15, 16], reward: 939.936, convergence: 10000
result: [2, 1, 6, 9, 3, 18, 5, 17, 10, 12, 0, 19, 13, 4, 11, 14, 8, 7, 15, 16], reward: 952.941, convergence: 10000
result: [11, 1, 5, 9, 3, 10, 19, 17, 7, 6, 18, 12, 13, 4, 14, 15, 8, 0, 16, 2], reward: 946.463, convergence: 10000
result: [2, 6, 3, 9, 1, 18, 5, 17, 7, 19, 10, 4, 13, 12, 11, 14, 8, 0, 1

In [58]:
convergence_threshold = 0.7

fpout = open('./experiments/convergence/WTA{}_dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], reward: 905.486, convergence: 73
result: [19, 6, 17, 10, 3, 18, 5, 11, 13, 12, 16, 4, 0, 15, 1, 2, 8, 7, 9, 14], reward: 911.682, convergence: 131
result: [19, 6, 2, 9, 16, 18, 5, 13, 10, 12, 3, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 952.806, convergence: 8166
result: [13, 6, 17, 10, 3, 18, 5, 11, 14, 12, 16, 4, 0, 15, 1, 2, 8, 7, 9, 19], reward: 896.559, convergence: 57
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 16, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 951.179, convergence: 6888
result: [11, 6, 2, 9, 16, 18, 5, 1, 10, 0, 3, 4, 13, 15, 14, 17, 8, 7, 19, 12], reward: 943.463, convergence: 10000
result: [11, 6, 17, 9, 15, 18, 5, 13, 10, 12, 3, 4, 14, 0, 1, 2, 8, 7, 19, 16], reward: 946.210, convergence: 5070
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 16, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 951.179, convergence: 4096
result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], rewa

In [59]:
convergence_threshold = 0.7

fpout = open('./experiments/convergence/WTA{}_dueling-dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dueling-dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dueling_dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 1, 5, 18, 17, 2, 3, 13, 10, 0, 4, 12, 14, 9, 11, 6, 8, 7, 16, 17], reward: 937.026, convergence: 1761
result: [19, 7, 5, 18, 1, 2, 3, 13, 10, 6, 4, 12, 14, 9, 11, 17, 8, 0, 15, 16], reward: 949.295, convergence: 4440
result: [19, 0, 5, 11, 4, 2, 1, 13, 10, 15, 3, 12, 14, 8, 17, 6, 18, 9, 7, 16], reward: 884.693, convergence: 37
result: [19, 0, 5, 11, 4, 2, 9, 18, 10, 0, 3, 12, 14, 7, 17, 6, 8, 13, 15, 16], reward: 891.980, convergence: 51
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 16, 17], reward: 938.690, convergence: 2314
result: [19, 0, 5, 11, 4, 2, 7, 13, 10, 15, 3, 12, 14, 18, 17, 6, 8, 9, 7, 16], reward: 888.723, convergence: 42
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 15, 16], reward: 952.873, convergence: 6839
result: [19, 0, 5, 11, 4, 2, 13, 1, 10, 2, 3, 12, 14, 18, 17, 6, 8, 9, 7, 16], reward: 886.069, convergence: 43
result: [19, 0, 5, 9, 4, 2, 13, 8, 10, 15, 3, 12, 14, 1, 17, 6, 18, 11, 7, 16], reward: 8

## Convergence threshold 0.8

In [60]:
convergence_threshold = 0.8

fpout = open('./experiments/convergence/WTA{}_random_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_random_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='random')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [15, 1, 17, 9, 3, 10, 5, 8, 7, 12, 4, 19, 13, 18, 11, 6, 14, 0, 16, 2], reward: 952.131, convergence: 10000
result: [10, 1, 5, 9, 15, 2, 3, 17, 7, 12, 18, 19, 13, 4, 11, 6, 8, 0, 16, 14], reward: 949.276, convergence: 10000
result: [2, 1, 6, 9, 17, 18, 5, 10, 7, 12, 4, 19, 13, 0, 11, 3, 8, 14, 15, 16], reward: 954.938, convergence: 10000
result: [2, 8, 9, 3, 1, 18, 5, 17, 10, 12, 4, 19, 13, 0, 11, 6, 14, 7, 15, 16], reward: 952.158, convergence: 10000
result: [6, 1, 7, 9, 18, 2, 3, 13, 10, 12, 17, 19, 14, 4, 11, 0, 8, 5, 15, 16], reward: 948.849, convergence: 10000
result: [2, 1, 17, 9, 15, 18, 5, 10, 7, 12, 3, 19, 13, 4, 11, 6, 8, 0, 14, 16], reward: 957.527, convergence: 10000
result: [19, 8, 5, 9, 1, 18, 2, 17, 10, 12, 3, 4, 13, 0, 11, 6, 14, 7, 15, 16], reward: 953.311, convergence: 10000
result: [2, 1, 11, 9, 15, 10, 3, 18, 7, 12, 17, 19, 13, 0, 14, 6, 8, 5, 16, 4], reward: 948.844, convergence: 10000
result: [11, 1, 17, 9, 15, 18, 2, 10, 7, 0, 3, 19, 13, 4, 14, 6, 8, 5, 1

In [61]:
convergence_threshold = 0.8

fpout = open('./experiments/convergence/WTA{}_dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 15, 4, 14, 0, 11, 17, 8, 7, 16, 1], reward: 950.907, convergence: 5828
result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 13], reward: 892.151, convergence: 69
result: [19, 6, 17, 10, 3, 18, 16, 13, 14, 12, 5, 4, 0, 15, 1, 2, 8, 7, 9, 11], reward: 905.486, convergence: 103
result: [19, 6, 17, 10, 3, 18, 16, 11, 14, 12, 5, 4, 13, 15, 1, 2, 8, 7, 9, 0], reward: 903.932, convergence: 50
result: [19, 6, 17, 10, 3, 18, 16, 1, 13, 12, 5, 4, 0, 15, 11, 2, 8, 7, 9, 14], reward: 906.868, convergence: 114
result: [19, 6, 2, 9, 3, 18, 5, 13, 10, 12, 16, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 951.179, convergence: 10000
result: [19, 6, 2, 9, 3, 18, 5, 1, 10, 12, 14, 4, 13, 0, 11, 17, 8, 7, 15, 16], reward: 950.499, convergence: 7054
result: [19, 6, 2, 9, 16, 18, 5, 13, 10, 12, 3, 4, 14, 0, 11, 17, 8, 7, 15, 1], reward: 952.806, convergence: 6184
result: [11, 6, 17, 9, 15, 18, 5, 13, 10, 12, 3, 4, 14, 0, 1, 2, 8, 7, 19, 16], rewar

In [62]:
convergence_threshold = 0.8

fpout = open('./experiments/convergence/WTA{}_dueling-dqn_threshold_{}_result.csv'.format(N_WTA, convergence_threshold), 'w')
wr = csv.writer(fpout, delimiter=',')
wr.writerow(['result', 'reward', 'convergence_iter'])

for i_exp in range(N_EXPERIMENTS):
    fpout_iter = open('./experiments/plot/WTA{}_dueling-dqn_threshold_{}_experiment{}.csv'.format(N_WTA, convergence_threshold, i_exp), 'w')
    fpout_iter.write('i_iter,reward,result\n')
    
    wta_1d = WTA_1D_General_GA(config, N_WTA, mutation_method='dueling_dqn')
    result, reward, convergence_iter = wta_1d.run(max_iter, convergence_threshold=convergence_threshold, verbose=False, fp=fpout_iter)
    
    print('result: {}, reward: {:.3f}, convergence: {}'.format(result, reward, convergence_iter))
    wr.writerow(['_'.join(map(lambda x: str(x), result)), '{:.3f}'.format(reward), convergence_iter])
    
    fpout_iter.close()
        
fpout.close()

result: [19, 7, 5, 9, 1, 2, 3, 13, 10, 0, 17, 12, 18, 4, 11, 6, 8, 14, 15, 16], reward: 949.660, convergence: 6571
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 15, 16], reward: 952.873, convergence: 9539
result: [19, 0, 5, 11, 4, 2, 9, 1, 10, 15, 3, 12, 14, 8, 17, 6, 18, 13, 7, 16], reward: 884.122, convergence: 35
result: [19, 0, 5, 11, 4, 2, 18, 1, 10, 15, 3, 12, 14, 8, 17, 6, 9, 13, 7, 16], reward: 886.971, convergence: 36
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 15, 16], reward: 952.873, convergence: 9875
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 8, 7, 16, 17], reward: 938.690, convergence: 4033
result: [19, 1, 5, 9, 18, 2, 3, 13, 10, 0, 17, 12, 14, 4, 11, 6, 8, 7, 15, 16], reward: 953.059, convergence: 10000
result: [19, 1, 5, 9, 15, 2, 3, 13, 10, 6, 18, 12, 14, 4, 11, 17, 8, 0, 7, 16], reward: 950.667, convergence: 10000
result: [19, 1, 5, 8, 18, 2, 3, 13, 10, 6, 4, 12, 14, 0, 11, 17, 9, 7, 15, 16], re

## Convergence threshold 0.9

## Convergence threshold 1.0

# Experiment Results

In [None]:
def plot_reward(filename_list, dataname_list, title=''):
    plt.figure(figsize=(20,10))
    for idx, filename in enumerate(filename_list):
        df = pd.read_csv(filename)
        plt.plot(df.i_iter, df.reward, label=dataname_list[idx])
    plt.legend()
    plt.ylabel('reward')
    plt.xlabel('iteration')
    plt.title(title)
    plt.show()

## Random 

In [66]:
df_threshold_50 = pd.read_csv('./experiments/convergence/WTA3_random_threshold_0.5_result.csv')
df_threshold_60 = pd.read_csv('./experiments/convergence/WTA3_random_threshold_0.6_result.csv')
df_threshold_70 = pd.read_csv('./experiments/convergence/WTA3_random_threshold_0.7_result.csv')
df_threshold_80 = pd.read_csv('./experiments/convergence/WTA3_random_threshold_0.8_result.csv')

print('Random Threshold 50% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_50['reward'].mean(), df_threshold_50['reward'].std(), df_threshold_50['convergence_iter'].mean(), df_threshold_50['convergence_iter'].std()))
print('Random Threshold 60% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_60['reward'].mean(), df_threshold_60['reward'].std(), df_threshold_60['convergence_iter'].mean(), df_threshold_60['convergence_iter'].std()))
print('Random Threshold 70% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_70['reward'].mean(), df_threshold_70['reward'].std(), df_threshold_70['convergence_iter'].mean(), df_threshold_70['convergence_iter'].std()))
print('Random Threshold 80% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_80['reward'].mean(), df_threshold_80['reward'].std(), df_threshold_80['convergence_iter'].mean(), df_threshold_80['convergence_iter'].std()))

Random Threshold 50% - reward: 951.37 (3.761), convergence_iter: 10000.00 (0.000)
Random Threshold 60% - reward: 951.50 (2.496), convergence_iter: 10000.00 (0.000)
Random Threshold 70% - reward: 949.78 (6.155), convergence_iter: 10000.00 (0.000)
Random Threshold 80% - reward: 952.17 (2.786), convergence_iter: 10000.00 (0.000)


## DQN

In [68]:
df_threshold_50 = pd.read_csv('./experiments/convergence/WTA3_dqn_threshold_0.5_result.csv')
df_threshold_60 = pd.read_csv('./experiments/convergence/WTA3_dqn_threshold_0.6_result.csv')
df_threshold_70 = pd.read_csv('./experiments/convergence/WTA3_dqn_threshold_0.7_result.csv')
df_threshold_80 = pd.read_csv('./experiments/convergence/WTA3_dqn_threshold_0.8_result.csv')
df_threshold_100 = pd.read_csv('./experiments/WTA3/dqn_mutation.csv')

print('DQN Threshold 50% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_50['reward'].mean(), df_threshold_50['reward'].std(), df_threshold_50['convergence_iter'].mean(), df_threshold_50['convergence_iter'].std()))
print('DQN Threshold 60% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_60['reward'].mean(), df_threshold_60['reward'].std(), df_threshold_60['convergence_iter'].mean(), df_threshold_60['convergence_iter'].std()))
print('DQN Threshold 70% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_70['reward'].mean(), df_threshold_70['reward'].std(), df_threshold_70['convergence_iter'].mean(), df_threshold_70['convergence_iter'].std()))
print('DQN Threshold 80% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_80['reward'].mean(), df_threshold_80['reward'].std(), df_threshold_80['convergence_iter'].mean(), df_threshold_80['convergence_iter'].std()))
print('DQN Threshold 100% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_100['reward'].mean(), df_threshold_100['reward'].std(), df_threshold_100['convergence_iter'].mean(), df_threshold_100['convergence_iter'].std()))

DQN Threshold 50% - reward: 919.54 (22.314), convergence_iter: 1179.00 (1495.911)
DQN Threshold 60% - reward: 922.11 (24.282), convergence_iter: 1565.10 (2065.143)
DQN Threshold 70% - reward: 926.80 (23.791), convergence_iter: 3461.30 (3899.291)
DQN Threshold 80% - reward: 931.28 (25.474), convergence_iter: 4038.90 (3669.057)
DQN Threshold 100% - reward: 913.26 (28.217), convergence_iter: 1882.50 (2494.006)


## Dueling DQN

In [69]:
df_threshold_50 = pd.read_csv('./experiments/convergence/WTA3_dueling-dqn_threshold_0.5_result.csv')
df_threshold_60 = pd.read_csv('./experiments/convergence/WTA3_dueling-dqn_threshold_0.6_result.csv')
df_threshold_70 = pd.read_csv('./experiments/convergence/WTA3_dueling-dqn_threshold_0.7_result.csv')
df_threshold_80 = pd.read_csv('./experiments/convergence/WTA3_dueling-dqn_threshold_0.8_result.csv')
df_threshold_100 = pd.read_csv('./experiments/WTA3/dueling_dqn_mutation.csv')

print('Dueling DQN Threshold 50% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_50['reward'].mean(), df_threshold_50['reward'].std(), df_threshold_50['convergence_iter'].mean(), df_threshold_50['convergence_iter'].std()))
print('Dueling DQN Threshold 60% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_60['reward'].mean(), df_threshold_60['reward'].std(), df_threshold_60['convergence_iter'].mean(), df_threshold_60['convergence_iter'].std()))
print('Dueling DQN Threshold 70% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_70['reward'].mean(), df_threshold_70['reward'].std(), df_threshold_70['convergence_iter'].mean(), df_threshold_70['convergence_iter'].std()))
print('Dueling DQN Threshold 80% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_80['reward'].mean(), df_threshold_80['reward'].std(), df_threshold_80['convergence_iter'].mean(), df_threshold_80['convergence_iter'].std()))
print('Dueling DQN Threshold 100% - reward: {:.2f} ({:.3f}), convergence_iter: {:.2f} ({:.3f})'.format(df_threshold_100['reward'].mean(), df_threshold_100['reward'].std(), df_threshold_100['convergence_iter'].mean(), df_threshold_100['convergence_iter'].std()))

Dueling DQN Threshold 50% - reward: 921.68 (29.071), convergence_iter: 1818.40 (2652.644)
Dueling DQN Threshold 60% - reward: 893.33 (17.699), convergence_iter: 206.70 (538.853)
Dueling DQN Threshold 70% - reward: 917.19 (31.061), convergence_iter: 2356.50 (3048.546)
Dueling DQN Threshold 80% - reward: 937.04 (27.487), convergence_iter: 6632.50 (4040.640)
Dueling DQN Threshold 100% - reward: 907.78 (36.415), convergence_iter: 2191.50 (2558.097)
