# Test and analyse performance with neural network VS constant actions

## Import and initialise settings

In [None]:
from ddqn import DDQNAgent
from DE_oldFeatures import DE
import numpy as np
import ioh
import time
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import gzip
import os

In [None]:
func_choice = [ioh.problem.BBOB.problems[func] for func in ioh.problem.BBOB.problems] # list of functions
d_choice = [10]
func_select = [(func, d) for func in func_choice for d in d_choice]

fun, dim = func_select[0]
f = ioh.get_problem(fun, 1, dim)
env = DE(f)
NP = env.NP

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

labels = [x[0].__name__+' '+str(x[1]) for x in env.mutations] + ["random"]
print(labels)
colors = plt.cm.tab20(range(len(labels)))

def index_to_target(index):
    return -0.2 * index + 2

# create directory 'test' if it does not exist
if not os.path.exists('test'):
   os.makedirs('test')

## Test model on all functions

Creates files in the 'test' directory where it stores information about the reward, error, budget, when targets are hit, the actions that are taken, Q-values and if (and when) the DE converged during the run.

In [None]:
episodes = 100
budget = 1e4

# load checkpoint and turn off randomness
agent = DDQNAgent(state_size, action_size)
agent.load("model1/ddqn")
agent.epsilon = 0.0

for fun, dim in func_select:
    print(fun,dim)

    fun_reward = []
    fun_error = []
    fun_budget = []
    fun_target_budget = []
    fun_actions = []
    fun_Q = []
    fun_converged = []
    for i in range(episodes):
        f = ioh.get_problem(fun, int(i/20), dim)
        env = DE(f, budget, True, 0.9)
        state = env.reset()
        target_i = 0
        target_budget = np.full(51, budget*10, dtype=int)
        run_reward = []
        run_actions = []
        run_Q = []
        run_converged = []
        done = False
        best_so_far = env.best_so_far
        while np.log10(best_so_far - env.best_value) < index_to_target(target_i) and target_i<51:
            target_budget[target_i] = int(budget - env.budget)
            target_i += 1
        while not done:
            state = np.reshape(state, [NP, 1, state_size])
            actions, Q = agent.act(state, Q=True)
            state, reward, done, converged = env.step(actions)
            best_so_far = min(best_so_far, env.best_so_far)
            if not done and converged:
                print("converged")
                b = env.budget
                state = env.reset()
                env.budget = b - env.NP
                run_converged.append(env.budget)
            while np.log10(best_so_far - env.best_value) < index_to_target(target_i) and target_i<51:
                target_budget[target_i] = int(budget - env.budget)
                target_i += 1
            run_reward.append(reward)
            run_actions.append(actions)
            run_Q.append(Q)
        fun_reward.append(run_reward)
        fun_error.append(best_so_far - env.best_value)
        fun_budget.append(env.budget)
        fun_target_budget.append(target_budget)
        fun_actions.append(run_actions)
        fun_Q.append(run_Q)
        fun_converged.append(fun_converged)

    name = "test/mod1_"+str(f.meta_data.problem_id)
    if os.path.exists(name+".gz"):
        os.remove(name+".gz")
    fptr = gzip.open(name+".gz", "wb")
    pickle.dump([fun_reward,fun_error,fun_budget,fun_target_budget,fun_actions,fun_Q,fun_converged], fptr)
    fptr.close()

## Establish baseline by testing with a constant mutation strategy

For every function try all strategies + random for a set amount of times.

Creates files in the 'test' directory where it stores information about the reward, error, budget, when targets are hit, the actions that are taken, Q-values and if (and when) the DE converged during the run.

In [None]:
episodes = 100
budget = 1e4

for fun, dim in func_select:
    a_reward = []
    a_error = []
    a_budget = []
    a_target_budget = []
    for a in range(action_size + 1): # try all strategies + random
        print(fun,dim,a)
        
        fun_reward = []
        fun_error = []
        fun_budget = []
        fun_target_budget = []
        for i in range(episodes):
            f = ioh.get_problem(fun, int(i/20), dim)
            env = DE(f, budget, False, 0.9)
            env.reset()
            target_i = 0
            target_budget = np.full(51, budget*10, dtype=int)
            run_reward = []
            done = False
            best_so_far = env.best_so_far
            while np.log10(best_so_far - env.best_value) < index_to_target(target_i) and target_i<51:
                target_budget[target_i] = int(budget - env.budget)
                target_i += 1
            while not done:
                if a == action_size:
                    actions = np.random.randint(env.action_space.n,size=env.NP,dtype=int)
                else:
                    actions = np.repeat(a,env.NP)
                _, reward, done, converged = env.step(actions)
                best_so_far = min(best_so_far, env.best_so_far)
                if not done and converged:
                    print("converged")
                    b = env.budget
                    env.reset()
                    env.budget = b - env.NP
                while np.log10(best_so_far - env.best_value) < index_to_target(target_i) and target_i<51:
                    target_budget[target_i] = int(budget - env.budget)
                    target_i += 1
                run_reward.append(reward)
            fun_reward.append(run_reward)
            fun_error.append(best_so_far - env.best_value)
            fun_budget.append(env.budget)
            fun_target_budget.append(target_budget)
        a_reward.append(fun_reward)
        a_error.append(fun_error)
        a_budget.append(fun_budget)
        a_target_budget.append(fun_target_budget)

    name = "test/con_"+str(f.meta_data.problem_id)
    if os.path.exists(name+".gz"):
        os.remove(name+".gz")
    fptr = gzip.open(name+".gz", "wb")
    pickle.dump([a_reward,a_error,a_budget,a_target_budget], fptr)
    fptr.close()

## Load fraction-budget logs and plot

First make seperate plots for every function. Then make plot with cumulative over all functions.

In [None]:
endfrac_list = []
auc_list = []
for f in range(1, 25):
    endfrac_fun = []
    auc_fun = []
    max_targets = 0
    fptr = gzip.open("test/con_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_target_budget = pickle.load(fptr)[3]  # read binary data from file and store in list
    fptr.close()
    for a in range(action_size + 1):
        x = np.array(fun_target_budget[a]).flatten()
        x = np.sort(x)
        endfrac = np.searchsorted(x,1e4)+1
        y = np.linspace(0, 1, len(x))[:endfrac]
        x = np.clip(x[:endfrac],0,1e4)
        endfrac_fun.append(y[endfrac-1])
        auc_fun.append(np.trapz(y,x))
        max_targets = max(max_targets, y[-1])
        if a == 8:
            c = 'black'
        else:
            c = colors[a+1-2*(a%2)] * 0.9
            c[3] = 1.0
        plt.plot(x, y, label=labels[a], color=c)
    
    fptr = gzip.open("test/mod1_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_target_budget = pickle.load(fptr)[3]  # read binary data from file and store in list
    fptr.close()
    
    x = np.array(fun_target_budget).flatten()
    x = np.sort(x)
    endfrac = np.searchsorted(x,1e4)+1
    y = np.linspace(0, 1, len(x))[:endfrac]
    x = np.clip(x[:endfrac],0,1e4)
    endfrac_fun.append(y[endfrac-2])
    auc_fun.append(np.trapz(y,x))
    max_targets = max(max_targets, y[-1])
    plt.plot(x, y, label="model1",color="b",linestyle='dashed')
    

    fptr = gzip.open("test/mod2_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_target_budget = pickle.load(fptr)[3]  # read binary data from file and store in list
    fptr.close()
    
    x = np.array(fun_target_budget).flatten()
    x = np.sort(x)
    endfrac = np.searchsorted(x,1e4)+1
    y = np.linspace(0, 1, len(x))[:endfrac]
    x = np.clip(x[:endfrac],0,1e4)
    endfrac_fun.append(y[endfrac-2])
    auc_fun.append(np.trapz(y,x))
    max_targets = max(max_targets, y[-1])
    plt.plot(x, y, label="model2",color="r",linestyle='dashed')
    

    fptr = gzip.open("test/mod3_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_target_budget = pickle.load(fptr)[3]  # read binary data from file and store in list
    fptr.close()
    
    x = np.array(fun_target_budget).flatten()
    x = np.sort(x)
    endfrac = np.searchsorted(x,1e4)+1
    y = np.linspace(0, 1, len(x))[:endfrac]
    x = np.clip(x[:endfrac],0,1e4)
    endfrac_fun.append(y[endfrac-2])
    auc_fun.append(np.trapz(y,x))
    max_targets = max(max_targets, y[-1])
    plt.plot(x, y, label="model3",color="g",linestyle='dashed')
    
    plt.xscale("log")
    plt.xlim(1e2, 1e4)
    plt.ylim(0, min(max_targets*1.1, 1))
    plt.xlabel("No. function evaluations")
    plt.ylabel("Fraction of targets hit")
    plt.grid(which="both")
    plt.legend()
    plt.title(str(f)+' '+func_choice[f-1])
    plt.savefig("targets_fun"+str(f)+".pdf", bbox_inches='tight')
    plt.show()
    endfrac_list.append(endfrac_fun)
    auc_list.append(auc_fun)

In [None]:
functions = 24

max_targets = 0

total_targets = []
endfrac_fun = []
auc_fun = []
for f in range(1, functions+1):
    fptr = gzip.open("test/con_"+str(f)+".gz", "rb")  # open file in read binary mode
    total_targets.append(pickle.load(fptr)[3])  # read binary data from file and store in list
    fptr.close()

total_targets = np.concatenate(total_targets, axis=1)

for a in range(action_size + 1):
    x = np.array(total_targets[a]).flatten()
    x = np.sort(x)
    endfrac = np.searchsorted(x,1e4)+1
    y = np.linspace(0, 1, len(x))[:endfrac]
    x = np.clip(x[:endfrac],0,1e4)
    endfrac_fun.append(y[endfrac-1])
    auc_fun.append(np.trapz(y,x))
    max_targets = max(max_targets, y[-1])
    if a == 8:
        c = 'black'
    else:
        c = colors[a+1-2*(a%2)] * 0.9
        c[3] = 1.0
    plt.plot(x, y, label=labels[a], color=c)

total_targets = []
for f in range(1, functions+1):
    fptr = gzip.open("test/mod1_"+str(f)+".gz", "rb")  # open file in read binary mode
    total_targets.append(pickle.load(fptr)[3])  # read binary data from file and store in list
    fptr.close()
    
total_targets = np.concatenate(total_targets)

x = np.array(total_targets).flatten()
x = np.sort(x)
endfrac = np.searchsorted(x,1e4)+1
y = np.linspace(0, 1, len(x))[:endfrac]
x = np.clip(x[:endfrac],0,1e4)
endfrac_fun.append(y[endfrac-2])
auc_fun.append(np.trapz(y,x))
max_targets = max(max_targets, y[-1])
plt.plot(x, y, label="model1",color="b",linestyle='dashed')


total_targets = []
for f in range(1, functions+1):
    fptr = gzip.open("test/mod2_"+str(f)+".gz", "rb")  # open file in read binary mode
    total_targets.append(pickle.load(fptr)[3])  # read binary data from file and store in list
    fptr.close()
    
total_targets = np.concatenate(total_targets)

x = np.array(total_targets).flatten()
x = np.sort(x)
endfrac = np.searchsorted(x,1e4)+1
y = np.linspace(0, 1, len(x))[:endfrac]
x = np.clip(x[:endfrac],0,1e4)
endfrac_fun.append(y[endfrac-2])
auc_fun.append(np.trapz(y,x))
max_targets = max(max_targets, y[-1])
plt.plot(x, y, label="model2",color="r",linestyle='dashed')


total_targets = []
for f in range(1, functions+1):
    fptr = gzip.open("test/mod3_"+str(f)+".gz", "rb")  # open file in read binary mode
    total_targets.append(pickle.load(fptr)[3])  # read binary data from file and store in list
    fptr.close()
    
total_targets = np.concatenate(total_targets)

x = np.array(total_targets).flatten()
x = np.sort(x)
endfrac = np.searchsorted(x,1e4)+1
y = np.linspace(0, 1, len(x))[:endfrac]
x = np.clip(x[:endfrac],0,1e4)
endfrac_fun.append(y[endfrac-2])
auc_fun.append(np.trapz(y,x))
max_targets = max(max_targets, y[-1])
plt.plot(x, y, label="model3",color="g",linestyle='dashed')

endfrac_list.append(endfrac_fun)
auc_list.append(auc_fun)

plt.xscale("log")
plt.xlim(1e2, 1e4)
plt.ylim(0, min(max_targets*1.1, 1))
plt.xlabel("No. function evaluations")
plt.ylabel("Fraction of targets hit")
plt.grid(which="both")
plt.legend()
plt.title("average over all functions")
plt.savefig("targets_avg.pdf", bbox_inches='tight')
plt.show()

## Analyse actions taken by models

Open all logs from the models, count how often each action is taken and store in seperate excel sheets for each function

In [None]:
total_actions = []
for f in range(1, 25):
    fun_actions = []
    fptr = gzip.open("test/mod1_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_actions.append(pickle.load(fptr)[4])  # read binary data from file and store in list
    fptr.close()
    
    fptr = gzip.open("test/mod2_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_actions.append(pickle.load(fptr)[4])  # read binary data from file and store in list
    fptr.close()
    
    fptr = gzip.open("test/mod3_"+str(f)+".gz", "rb")  # open file in read binary mode
    fun_actions.append(pickle.load(fptr)[4])  # read binary data from file and store in list
    fptr.close()
    total_actions.append(fun_actions)

In [None]:
def count_occurrences(arr, target):
    '''Very slow recursive function for counting occurences of actions
    in a multidiminsial list of a unknown inhomogeneous shape
    '''
    count = 0
    for elem in arr:
        if isinstance(elem, list) or isinstance(elem, np.ndarray):
            count += count_occurrences(elem, target)
        elif elem == target:
            count += 1
    return count

l = total_actions
c = []
for m in range(3):
    c_m = []
    for f in range(24):
        c_m.append([count_occurrences(l[f][m],a) for a in range(8)])
    c.append(c_m)
print(np.shape(c))

In [None]:
for i in range(len(c)):
    df = pd.DataFrame(c[i], columns=labels[:8])
    df.to_excel("a_count_list"+str(i)+".xlsx")

### Export tables

In [None]:
df = pd.DataFrame(endfrac_list, columns=labels+["model1","model2","model3"])
df.to_excel("endfrac_list.xlsx")

In [None]:
df = pd.DataFrame(auc_list, columns=labels+["model1","model2","model3"])
df.to_excel("auc_list.xlsx")

## Analyse actions and Q-values

Do a episodes and keep track of Q-value and the individual that performed best overall. Then create plots of the actions taken and the Q-value of the best individual.

In [None]:
episodes = 5
budget = 1e4

agent = DDQNAgent(state_size, action_size)
agent.load("model1/ddqn")
agent.epsilon = 0.0

fun_actions = []
fun_Q = []
fun_error = []
fun_best_individuals = []
for fun, dim in func_select:
    print(fun,dim)
    f = ioh.get_problem(fun, 1, dim)
    env = DE(f, budget, True, 0.9)
    episode_actions = []
    episode_Q = []
    episode_error = []
    best_individuals = []
    for i in range(episodes):
        state = env.reset()
        done = False
        single_a = []
        single_Q = []
        single_error = []
        while not done:
            state = np.reshape(state, [NP, 1, state_size])
            actions, Q = agent.act(state, Q=True)
            single_a.append(actions)
            single_Q.append(Q)
            single_error.append(env.best_so_far - env.best_value)
            state, reward, done, converged = env.step(actions)
            if converged:
                print("converged")
                b = env.budget
                state = env.reset()
                env.budget = b - env.NP
        # best_individual = np.argmin(env.F)
        # print(best_individual)
        episode_actions.append(single_a)
        episode_Q.append(single_Q)
        episode_error.append(single_error)
        best_individuals.append(np.argmin(env.F))
    fun_actions.append(episode_actions)
    fun_Q.append(episode_Q)
    fun_error.append(episode_error)
    fun_best_individuals.append(best_individuals)

In [None]:
for f in range(24):
    for e in range(len(fun_actions[f])):
        print(f+1,e+1)
        action_sum = []
        for a in range(8):
            action_sum.append(np.sum(np.array(fun_actions[f][e]) == a, axis=1))
        # print(action_sum)
        
        fig, ax1 = plt.subplots()
        
        ax2 = ax1.twinx()
        lns1 = ax1.stackplot(range(np.shape(action_sum)[1]), np.array(action_sum))
        log_error = fun_error[f][e]
        lns2 = ax2.plot(log_error, c='black')
        ax2.set_yscale("log")
        ax1.set_xlim(0, len(fun_error[f][e])-1)
        ax1.set_ylim(0, 100)
        ax2.set_ylim(1e-8, max(log_error))

        ax2.legend(lns1+lns2, labels[:8]+["error"], loc=0,fancybox=True, framealpha=0.3)

        ax1.set_xlabel("generation")
        ax1.set_ylabel("actions per generation")
        ax2.set_ylabel("error")
        
        plt.title(str(f+1)+' '+func_choice[f])
        plt.savefig("model1_actions_fun"+str(f+1)+"_ep"+str(e+1)+".pdf", bbox_inches='tight')
        plt.show()

In [None]:
for f in range(24):
    for e in range(len(fun_actions[f])):
        Q = np.array(fun_Q[f][e])
        Q = Q[:,fun_best_individuals[f][e]]
        for a in range(8):
            plt.plot(Q[:,a],label=labels[a])
        plt.legend()
        plt.xlabel("generation")
        plt.ylabel("Q")
        plt.xlim(0, len(fun_error[f][e])-1)
        plt.grid()
        plt.title("Best individual in "+func_choice[f])
        plt.savefig("model2_bestQ_fun"+str(f+1)+"_ep"+str(e+1)+".pdf", bbox_inches='tight')
        plt.show()

## Analyse reward

Create tables with the average rewards. These are exported to excel sheet and stylised using excel.

In [None]:
f = 1
fptr = gzip.open("test/con_"+str(f)+".gz", "rb")  # open file in read binary mode
r = pickle.load(fptr)[0]  # read binary data from file and store in list
fptr.close()
print(np.shape(r))
np.mean(r, axis=(1,2,3))

In [None]:
list_mean_reward = []
totaltotal = 0
nn = 0
for f in range(1,25):
    fptr = gzip.open("test/mod1_"+str(f)+".gz", "rb")  # open file in read binary mode
    r = pickle.load(fptr)[0]  # read binary data from file and store in list
    fptr.close()
    # print(np.shape(r[0]))
    
    total = 0
    n = 0
    for run in r:
        for gen in run:
            for i in gen:
                total += i
                n += 1
    nn += n
    totaltotal += total
    list_mean_reward.append(total/n)
list_mean_reward.append(totaltotal/nn)
list_mean_reward

In [None]:
r = []
for f in range(1,25):
    fptr = gzip.open("test/con_"+str(f)+".gz", "rb")  # open file in read binary mode
    r.append(pickle.load(fptr)[0])  # read binary data from file and store in list
    fptr.close()

In [None]:
con_list_mean_reward = []
for a in range(len(r[0])):
    list_mean_reward = []
    totaltotal = 0
    nn = 0
    for f in range(24):
        total = 0
        n = 0
        for run in r[f][a]:
            for gen in run:
                for i in gen:
                    total += i
                    n += 1
        nn += n
        totaltotal += total
        list_mean_reward.append(total/n)
    list_mean_reward.append(totaltotal/nn)
    con_list_mean_reward.append(list_mean_reward)
np.shape(con_list_mean_reward)

In [None]:
mod_list_mean_reward = []

r = []
for f in range(1,25):
    fptr = gzip.open("test/mod1_"+str(f)+".gz", "rb")  # open file in read binary mode
    r.append(pickle.load(fptr)[0])  # read binary data from file and store in list
    fptr.close()

list_mean_reward = []
totaltotal = 0
nn = 0
for f_r in r:
    total = 0
    n = 0
    for run in f_r:
        for gen in run:
            for i in gen:
                total += i
                n += 1
    nn += n
    totaltotal += total
    list_mean_reward.append(total/n)
list_mean_reward.append(totaltotal/nn)
mod_list_mean_reward.append(list_mean_reward)

r = []
for f in range(1,25):
    fptr = gzip.open("test/mod2_"+str(f)+".gz", "rb")  # open file in read binary mode
    r.append(pickle.load(fptr)[0])  # read binary data from file and store in list
    fptr.close()

list_mean_reward = []
totaltotal = 0
nn = 0
for f_r in r:
    total = 0
    n = 0
    for run in f_r:
        for gen in run:
            for i in gen:
                total += i
                n += 1
    nn += n
    totaltotal += total
    list_mean_reward.append(total/n)
list_mean_reward.append(totaltotal/nn)
mod_list_mean_reward.append(list_mean_reward)

r = []
for f in range(1,25):
    fptr = gzip.open("test/mod3_"+str(f)+".gz", "rb")  # open file in read binary mode
    r.append(pickle.load(fptr)[0])  # read binary data from file and store in list
    fptr.close()

list_mean_reward = []
totaltotal = 0
nn = 0
for f_r in r:
    total = 0
    n = 0
    for run in f_r:
        for gen in run:
            for i in gen:
                total += i
                n += 1
    nn += n
    totaltotal += total
    list_mean_reward.append(total/n)
list_mean_reward.append(totaltotal/nn)
mod_list_mean_reward.append(list_mean_reward)

In [None]:
mean_reward = np.concatenate((con_list_mean_reward, mod_list_mean_reward))

In [None]:
df = pd.DataFrame(mean_reward, labels+["model1","model2","model3"])

In [None]:
df.to_excel("mean_reward.xlsx")

In [None]:
def make_pretty(styler):
    styler.background_gradient(axis=1, vmin=0, cmap="YlGnBu")
    return styler

In [None]:
df.style.pipe(make_pretty)

## Analyse state features

In [None]:
fptr = gzip.open("model1/buffer_100.gz", "rb")  # open file in read binary mode
buffer = pickle.load(fptr)  # read binary data from file and store in list
fptr.close()

In [None]:
states = [ex[0] for ex in buffer]
df = pd.DataFrame(states)
c = df.corr()
f = plt.figure(figsize=(19, 15))
plt.matshow(c, fignum=f.number)
plt.show()