In [1]:
import pandas as pd
import gsa
import barneshut as bh
import functions

In [2]:
dims = 30
pop_size = 50
num_funcs = 23
forces = [gsa.basicForce, bh.BHForce(1e5), bh.BHForce(0.5), bh.BHForce(1e-5), bh.BHForce(0)]
repetitions = 2

In [4]:
condition = gsa.timeShortCondition

###Run for `condition`

In [4]:
result = pd.DataFrame(columns = ["FUNCTION","SOLVER","BEST","ITERATIONS"])

for prob_num in range(1,num_funcs + 1):
    func = functions.getFunction(prob_num,dims)
    for force in forces:
        func.desc = str(prob_num) + "-" + force.desc
        print("Processing problem {0} with {1}".format(prob_num, force.desc))
        for i in range(repetitions):
            result = result.append(gsa.gsa(func,pop_size,force,condition, outputProgress=False), ignore_index = True)
    
result.to_csv("out-" + condition.desc + ".csv", index = False)

Processing problem 1 with GSA
Processing problem 1 with BH-100000.0
Processing problem 1 with BH-0.5
Processing problem 1 with BH-1e-05
Processing problem 1 with BH-0
Processing problem 2 with GSA
Processing problem 2 with BH-100000.0
Processing problem 2 with BH-0.5
Processing problem 2 with BH-1e-05
Processing problem 2 with BH-0
Processing problem 3 with GSA
Processing problem 3 with BH-100000.0
Processing problem 3 with BH-0.5
Processing problem 3 with BH-1e-05
Processing problem 3 with BH-0
Processing problem 4 with GSA
Processing problem 4 with BH-100000.0
Processing problem 4 with BH-0.5
Processing problem 4 with BH-1e-05
Processing problem 4 with BH-0
Processing problem 5 with GSA
Processing problem 5 with BH-100000.0
Processing problem 5 with BH-0.5
Processing problem 5 with BH-1e-05
Processing problem 5 with BH-0
Processing problem 6 with GSA
Processing problem 6 with BH-100000.0
Processing problem 6 with BH-0.5
Processing problem 6 with BH-1e-05
Processing problem 6 with BH-

###Analyse results fixed iterations run

In [20]:
condition = gsa.iterationCondition
result = pd.read_csv("./out-" + condition.desc + ".csv")

In [21]:
import numpy as np
from scipy.stats import ttest_ind

#Generate list of optimum values
optimums= np.zeros(23)
optimums[7] = -418.9829 * dims
optimums[13:23] = [1,3e-4,-1.0316,0.398,3,-3.86,-3.32,-10.1532,-10.4028,-10.5363]
#Duplicate values, one for each solver
optimums = [i for i in optimums for _ in range(len(forces))]

#Ensure BEST column in results is a number
result["BEST"] = result["BEST"].astype(float)

#Create averages DataFrame sorted in proper order
itAverages = result.groupby(["FUNCTION","SOLVER"])["BEST"].mean().reset_index()
itAverages["FUNCTION"] = itAverages["FUNCTION"].astype(int)
itAverages = itAverages.rename(columns={"BEST":"AVERAGE"})

#Sort by FUNCTION then by SOLVER in order given in forces
sortOrder = dict(zip([i.desc for i in forces],[i for i in range(len(forces))]))
itAverages["SOLVER_ORDER"] = itAverages["SOLVER"].map(sortOrder)
itAverages = itAverages.sort(["FUNCTION","SOLVER_ORDER"],ascending=[1,1]).reset_index(drop=True)
itAverages = itAverages.drop("SOLVER_ORDER",1)

#Add DIFF column
itAverages["DIFF"] = pd.Series(np.abs(itAverages["AVERAGE"]-optimums), index = itAverages.index)

#Create values to be used for t-test as a list of lists of lists. Inner lists store a set of values for a given func and solver
#Middle lists store the lists for a given func for every solver
#Outer list is used for each function
ttest_values = [[result[(result["FUNCTION"]==j) & (result["SOLVER"]==i.desc)]["BEST"].tolist() for i in forces] for j in range(1,24)]

#Calculate the p-values
itAverages["P-VALUE"] = pd.Series([ttest_ind(ttest_values[i][j],ttest_values[i][0])[1] for i in range(num_funcs) for j in range(len(forces))],index = itAverages.index)

#Determine significance
significanceLevel = 0.05
itAverages["SIGNIFICANT"] = pd.Series([i < significanceLevel for i in itAverages["P-VALUE"]], index = itAverages.index)

#If significant, ensure that it's better
itAverages["BETTER"] = pd.Series([False if not itAverages.ix[i,5] else (itAverages.ix[(i//len(forces))*len(forces),2] < itAverages.ix[i,2]) for i in itAverages.index], index = itAverages.index)

In [7]:
itAverages[itAverages["SOLVER"] == "GSA"].ix[:,[0,1,3]]

Unnamed: 0,FUNCTION,SOLVER,DIFF
0,1,GSA,1.159571e-17
5,2,GSA,2.439607e-08
10,3,GSA,226.6843
15,4,GSA,0.001092884
20,5,GSA,25.10509
25,6,GSA,0.0
30,7,GSA,0.06783595
35,8,GSA,11536.04
40,9,GSA,12.30432
45,10,GSA,1.835515e-09


In [8]:
itAverages[itAverages["SOLVER"] != "GSA"]

Unnamed: 0,FUNCTION,SOLVER,AVERAGE,DIFF,P-VALUE,SIGNIFICANT,BETTER
1,1,BH-100000.0,4.223836e-17,4.223836e-17,1.299871e-16,True,True
2,1,BH-0.5,4.054696e-17,4.054696e-17,5.633309e-19,True,True
3,1,BH-1e-05,4.073684e-17,4.073684e-17,6.501938e-15,True,True
4,1,BH-0,4.258307e-17,4.258307e-17,1.371856e-14,True,True
6,2,BH-100000.0,4.985516e-08,4.985516e-08,2.492333e-19,True,True
7,2,BH-0.5,4.728732e-08,4.728732e-08,9.062553e-18,True,True
8,2,BH-1e-05,5.112812e-08,5.112812e-08,5.328573e-17,True,True
9,2,BH-0,5.128440e-08,5.128440e-08,2.236382e-22,True,True
11,3,BH-100000.0,3.317490e+02,3.317490e+02,6.640567e-03,True,True
12,3,BH-0.5,4.592521e+02,4.592521e+02,1.807893e-08,True,True


###Analyse results fixed time run

In [9]:
condition = gsa.timeCondition
result = pd.read_csv("./out-" + condition.desc + ".csv")

In [11]:
import numpy as np
from scipy.stats import ttest_ind

#Ensure ITERATIONS column in results is a number
result["ITERATIONS"] = result["ITERATIONS"].astype(int)

#Create averages DataFrame sorted in proper order
timeAverages = result.groupby(["FUNCTION","SOLVER"])["ITERATIONS"].mean().reset_index()
timeAverages["FUNCTION"] = timeAverages["FUNCTION"].astype(int)
timeAverages = timeAverages.rename(columns={"ITERATIONS":"AVERAGE"})

#Sort by FUNCTION then by SOLVER in order given in forces
sortOrder = dict(zip([i.desc for i in forces],[i for i in range(len(forces))]))
timeAverages["SOLVER_ORDER"] = timeAverages["SOLVER"].map(sortOrder)
timeAverages = timeAverages.sort(["FUNCTION","SOLVER_ORDER"],ascending=[1,1]).reset_index(drop=True)
timeAverages = timeAverages.drop("SOLVER_ORDER",1)

#Create values to be used for t-test as a list of lists of lists. Inner lists store a set of values for a given func and solver
#Middle lists store the lists for a given func for every solver
#Outer list is used for each function
ttest_values = [[result[(result["FUNCTION"]==j) & (result["SOLVER"]==i.desc)]["ITERATIONS"].tolist() for i in forces] for j in range(1,24)]

#Calculate the p-values
timeAverages["P-VALUE"] = pd.Series([ttest_ind(ttest_values[i][j],ttest_values[i][0])[1] for i in range(num_funcs) for j in range(len(forces))],index = timeAverages.index)

#Determine significance
significanceLevel = 0.01
timeAverages["SIGNIFICANT"] = pd.Series([i < significanceLevel for i in timeAverages["P-VALUE"]], index = timeAverages.index)

#If significant, ensure that it's better
timeAverages["BETTER"] = pd.Series([False if not timeAverages.ix[i,4] else (timeAverages.ix[(i//len(forces))*len(forces),2] < timeAverages.ix[i,2]) for i in timeAverages.index], index = timeAverages.index)

In [12]:
print(len(timeAverages[(timeAverages["SOLVER"] != "GSA") & (timeAverages["BETTER"])]))
print(len(timeAverages[timeAverages["SOLVER"] != "GSA"]))

92
92


###Combined analysis

In [13]:
timeCondition = gsa.timeCondition
timeResult = pd.read_csv("./out-" + timeCondition.desc + ".csv")
itCondition = gsa.iterationCondition
itResult = pd.read_csv("./out-" + itCondition.desc + ".csv")

In [47]:
import numpy as np
from scipy.stats import ttest_ind

#Generate list of optimum values
optimums= np.zeros(23)
optimums[7] = -418.9829 * dims
optimums[13:23] = [1,3e-4,-1.0316,0.398,3,-3.86,-3.32,-10.1532,-10.4028,-10.5363]
#Duplicate values, one for each solver
optimums = [i for i in optimums for _ in range(len(forces))]

#Ensure ITERATIONS and BEST columns in results are numbers
timeResult["ITERATIONS"] = timeResult["ITERATIONS"].astype(int)
itResult["BEST"] = itResult["BEST"].astype(float)

#Create averages DataFrame sorted in proper order
averages = timeResult.groupby(["FUNCTION","SOLVER"])["ITERATIONS"].mean().reset_index()
averages["FUNCTION"] = averages["FUNCTION"].astype(int)
averages = averages.rename(columns={"ITERATIONS":"AV_ITS"})
averages["AV_BEST"] = itResult.groupby(["FUNCTION","SOLVER"])["BEST"].mean().reset_index()["BEST"]

#Sort by FUNCTION then by SOLVER in order given in forces
sortOrder = dict(zip([i.desc for i in forces],[i for i in range(len(forces))]))
averages["SOLVER_ORDER"] = averages["SOLVER"].map(sortOrder)
averages = averages.sort(["FUNCTION","SOLVER_ORDER"],ascending=[1,1]).reset_index(drop=True)
averages = averages.drop("SOLVER_ORDER",1)

#Calculate difference in average best value
averages["AV_BEST"] = np.abs(averages["AV_BEST"] - optimums)

#Average best value test
#Create values to be used for t-test as a list of lists of lists. Inner lists store a set of values for a given func and solver
#Middle lists store the lists for a given func for every solver
#Outer list is used for each function
ttest_values = [[itResult[(itResult["FUNCTION"]==j) & (itResult["SOLVER"]==i.desc)]["BEST"].tolist() for i in forces] for j in range(1,24)]

#Calculate the p-values
averages["P_BEST"] = pd.Series([ttest_ind(ttest_values[i][j],ttest_values[i][0])[1] for i in range(num_funcs) for j in range(len(forces))],index = averages.index)

#Determine significance
significanceLevel = 0.01
averages["SIG_BEST"] = pd.Series([i < significanceLevel for i in averages["P_BEST"]], index = averages.index)

#If significant, ensure that it's better
averages["BEST_GEQ"] = pd.Series([True if not averages.ix[i,5] else (averages.ix[(i//len(forces))*len(forces),3] < averages.ix[i,3]) for i in averages.index], index = averages.index)

#Average iteration count test
#Create values to be used for t-test as a list of lists of lists. Inner lists store a set of values for a given func and solver
#Middle lists store the lists for a given func for every solver
#Outer list is used for each function
ttest_values = [[timeResult[(timeResult["FUNCTION"]==j) & (timeResult["SOLVER"]==i.desc)]["ITERATIONS"].tolist() for i in forces] for j in range(1,24)]

#Calculate the p-values
averages["P_IT"] = pd.Series([ttest_ind(ttest_values[i][j],ttest_values[i][0])[1] for i in range(num_funcs) for j in range(len(forces))],index = averages.index)

#Determine significance
significanceLevel = 0.01
averages["SIG_IT"] = pd.Series([i < significanceLevel for i in averages["P_IT"]], index = averages.index)

#If significant, ensure that it's better
averages["IT_GEQ"] = pd.Series([True if not averages.ix[i,8] else (averages.ix[(i//len(forces))*len(forces),2] < averages.ix[i,2]) for i in averages.index], index = averages.index)

In [55]:
averages.to_csv("analysis.csv", index=False)

###CEC 2013 benchmark

In [3]:
from cec2013lsgo.cec2013 import Benchmark
wrapper = lambda num, sol: Benchmark().get_function(num)(sol)

In [4]:
condition = gsa.timeLongCondition

In [8]:
from pympler import summary

In [9]:
result = pd.DataFrame(columns = ["FUNCTION","SOLVER","BEST","ITERATIONS"])

for prob_num in range(1,2):#Benchmark().get_num_functions() + 1):
    info = Benchmark().get_info(prob_num)
    dims = info['dimension']
    func = functions.FunctionMaker("cec-" + str(prob_num), [info['upper']]*dims, [info['lower']]*dims, dims, 
                                   False, lambda sol: wrapper(prob_num,sol))
    for force in forces:
        func.desc = str(prob_num) + "-" + force.desc
        print("Processing problem {0} with {1}".format(prob_num, force.desc))
        for i in range(repetitions):
            result = result.append(gsa.gsa(func,pop_size,force,condition, outputProgress=False), ignore_index = True)
    
result.to_csv("cec-" + condition.desc + ".csv", index = False)

Processing problem 1 with GSA


KeyboardInterrupt: 