In [4]:
#Start with pip install -r requirements.txt
import pandas as pd
from functools import partial

pd.set_option("precision", 3)


# Hong & Page - replication runs

The models in Hong & Page is run 50 times, with three different parameter combinations presented. Here the fourth permutation (large teams with many heuristics) is included, and the number of runs increased to 500. This takes roughly 7 hours on a 4-core 2.4 GHz laptop - so I preferred to do it on Google Cloud Engine, using [pyscript2gce](https://github.com/LukasWallrich/pyscript2gce-production/releases/tag/HP-ABM-replication). This ran the following code. 

In [2]:
from mesa.batchrunner import BatchRunnerMP

import httpimport

url = "https://gist.githubusercontent.com/LukasWallrich/05f445821fbae694b37a205dc08b2b4f/raw/6163cfc6aaa9eba33738f42c5b1a35cff1053005"

with httpimport.remote_repo(["HPmodel"], url):
    from HPmodel import HPProblem

fixed_params = {"n": 2000, "k": 3}

variable_params = {"l": (12, 20), "N_agents": (10, 20)}

batch_run = BatchRunnerMP(
    HPProblem,
    16,
    variable_parameters = variable_params,
    fixed_parameters = fixed_params,
    iterations=500,
    max_steps=100,
    model_reporters={
        "agent_descriptives": lambda m: m.agent_descriptives,
        "solution": lambda m: m.best_solution,
    }
)

batch_run.run_all()

out = batch_run.get_model_vars_dataframe()

16it [01:52,  7.01s/it]


In [8]:
import pickle

with open("HPmodel_results.pkl",'rb') as f:
    res = pickle.load(f)


In [9]:
res = pd.concat([res.drop(["solution"], axis = 1), res.solution.apply(pd.Series).add_suffix("_solution")], axis = 1)

def renamer(col, prefix):
    if col.endswith("agent"):
        return col
    else:
        return prefix + col

res_random = res.agent_descriptives.apply(pd.Series).random.apply(pd.Series).rename(mapper = partial(renamer, prefix = "random_"), axis = "columns")

res_best = res.agent_descriptives.apply(pd.Series).best.apply(pd.Series).rename(mapper = partial(renamer, prefix = "best_"), axis = "columns")

res = pd.concat([res.drop(["agent_descriptives"], axis=1), res_best, res_random[res_random.columns[pd.Series(res_random.columns).str.startswith('random_')]]], axis=1)

res["run_id"] = res.reset_index().index
res = res.rename(columns={"best_agent": "top_agent"})

In [5]:

def renamer(col, prefix):
    if col.endswith("agent"):
        return col
    else:
        return prefix + col

res_random = res.agent_descriptives.apply(pd.Series).random.apply(pd.Series).rename(mapper = partial(renamer, prefix = "random_"), axis = "columns")

res_best = res.agent_descriptives.apply(pd.Series).best.apply(pd.Series).rename(mapper = partial(renamer, prefix = "best_"), axis = "columns")

res = pd.concat([res.drop(["agent_descriptives"], axis=1), res_best, res_random[res_random.columns[pd.Series(res_random.columns).str.startswith('random_')]]], axis=1)

res["run_id"] = res.reset_index().index
res = res.rename(columns={"best_agent": "top_agent"})


Unnamed: 0,l,N_agents,Run,n,k,random_solution,best_solution,worst_agent,average_agent,top_agent,best_team_average,best_NPdiversity,random_team_average,random_NPdiversity,run_id
0,12,10,2000,2000,3,97.811,89.833,82.398,83.759,85.278,84.964,0.815,83.493,0.933,0
1341,20,10,2000,2000,3,99.553,99.553,83.603,85.063,86.428,86.253,0.778,85.142,0.941,1
1340,20,10,2000,2000,3,95.183,98.935,84.317,85.880,87.230,87.076,0.785,85.900,0.933,2
1339,20,10,2000,2000,3,99.808,89.774,83.128,84.673,86.203,86.042,0.881,84.743,0.933,3
1338,20,10,2000,2000,3,94.527,94.527,82.961,84.645,86.329,86.090,0.844,84.299,0.926,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,12,20,2000,2000,3,89.454,89.454,82.777,84.242,85.236,85.052,0.874,84.208,0.907,1995
658,12,20,2000,2000,3,99.167,99.167,82.102,83.824,85.262,85.024,0.828,83.872,0.898,1996
657,12,20,2000,2000,3,98.871,98.871,83.379,85.177,86.581,86.376,0.835,85.370,0.891,1997
670,12,20,2000,2000,3,97.454,97.454,83.413,84.703,85.881,85.765,0.868,84.567,0.928,1998


In [6]:
#Pivot so that random and best groups can be easily compared
col_names = res.columns.values.tolist()

def check_var(col_name):
    return not(col_name.find("random_") != -1 or col_name.find("best_") != -1)


id_cols = list(filter(check_var, col_names))

out = pd.melt(res, id_cols)

out = out.join(out.variable.str.split("_", expand = True)).rename(columns={0:"team_type"}).pivot_table(index=id_cols + ["team_type"], columns=[1], values="value").reset_index()

out["NPdiversity"] = out["NPdiversity"] * 100 #Convert to percentages


In [10]:
#Performance and diversity of best versus random teams
tbl = out[["team_type", "N_agents", "l", "solution", "NPdiversity"]].groupby(["N_agents", "l", "team_type"]).describe().loc[:, (slice(None), ["mean", "std"])]

tbl = tbl.round(2)

sol = pd.DataFrame(tbl[('solution', 'mean')].astype(str) + " (" +  tbl[('solution', 'std')].astype(str) + ")")
sol.columns = ["Solution"]

div = pd.DataFrame(tbl[('NPdiversity', 'mean')].astype(str) + " (" +  tbl[('NPdiversity', 'std')].astype(str) + ")")
div.columns = ["Diversity"]

sol = sol.join(div)

sol.to_latex("Table1.tex", caption="Results of 500 runs of Hong & Page model")

sol


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Solution,Diversity
N_agents,l,team_type,Unnamed: 3_level_1,Unnamed: 4_level_1
10,12,best,92.48 (6.84),84.76 (4.39)
10,12,random,94.45 (5.55),91.63 (2.4)
10,20,best,93.27 (6.89),86.95 (4.61)
10,20,random,95.43 (4.29),94.92 (1.89)
20,12,best,93.56 (6.5),85.85 (2.98)
20,12,random,94.67 (5.57),91.75 (1.15)
20,20,best,94.8 (4.97),88.46 (3.3)
20,20,random,96.56 (3.33),95.07 (0.91)


In [25]:
res["random_winner"] = res["random_solution"] > res["best_solution"]
res["random_winner"] = res["random_winner"].astype(int) * 100

res["best_winner"] = res["random_solution"] < res["best_solution"]
res["best_winner"] = res["best_winner"].astype(int) *100

res["tie"] = res["random_solution"] == res["best_solution"]
res["tie"] = res["tie"].astype(int) * 100

win_rates = res[["random_winner", "tie", "best_winner", "N_agents", "l"]].groupby(["N_agents", "l"]).describe().loc[:, (slice(None), ["mean"])]

win_rates.columns = win_rates.columns.get_level_values(0)

win_rates["odds"] = win_rates["random_winner"]/win_rates["best_winner"]

win_rates


Unnamed: 0_level_0,Unnamed: 1_level_0,random_winner,tie,best_winner,odds
N_agents,l,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,12,40.0,41.8,18.2,2.198
10,20,46.6,24.8,28.6,1.629
20,12,32.0,50.2,17.8,1.798
20,20,43.4,36.0,20.6,2.107


In [28]:
#Describe agents
res[["worst_agent", "top_agent", "best_team_average", "random_team_average", "N_agents", "l"]].groupby(["N_agents", "l"]).describe().loc[:, (slice(None), ["mean", "std"])]

Unnamed: 0_level_0,Unnamed: 1_level_0,worst_agent,worst_agent,top_agent,top_agent,best_team_average,best_team_average,random_team_average,random_team_average
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std
N_agents,l,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
10,12,83.298,0.388,86.181,0.318,85.991,0.274,84.854,0.283
10,20,83.24,0.278,86.292,0.103,86.15,0.103,84.844,0.192
20,12,83.489,0.567,86.274,0.5,85.987,0.554,84.989,0.548
20,20,83.802,0.741,86.569,0.693,86.356,0.734,85.26,0.759


# Diverges from results by Hong and Page

## Standard deviations

In [31]:
#z-score based on H&P
print((93.2-92.56)/.02)

#SD of random teams
from itertools import permutations
from random import sample
from statistics import stdev, mean

heuristics = list(permutations(range(1, 13), 3))

res = []
for i in range(10000):
    team = sample(heuristics, 10)
    pairs = permutations(team, 2)
    res.append(mean((len(x[0])-sum(z == y for z, y in zip(x[0], x[1])))/len(x[0]) for x in pairs))
print("Teams of 10: " + str(stdev(res)))

res = []
for i in range(10000):
    team = sample(heuristics, 20)
    pairs = permutations(team, 2)
    res.append(mean((len(x[0])-sum(z == y for z, y in zip(x[0], x[1])))/len(x[0]) for x in pairs))
print("Teams of 20: " + str(stdev(res)))




32.00000000000003
Teams of 10: 0.023504950603343615
Teams of 20: 0.011365416781533749


## Diversity in group of best agents

Grim et al. report heuristics of best-performing agents on 10 fully random landscapes in Table 1. These groups have a mean diversity of 83% (range: 76% to 89%) as calculated below.

In [15]:
from itertools import permutations
from statistics import mean

def assess_hp_diversity(heuristic1, heuristic2):
    res = (len(heuristic1)-sum(x == y for x, y in zip(heuristic1, heuristic2)))/len(heuristic1)
    return res

best_teams = [[(12, 4, 5), (12, 2, 4), (12, 5, 4), (12, 4, 2), (5, 12, 4), (4, 12, 2), (6, 12, 4), (4, 5, 12), (12, 4, 6)], 
[(5, 7, 6), (10, 8, 7), (8, 7, 10), (7, 10, 8), (7, 5, 6), (7, 8, 10), (11, 10, 8), (5, 6, 7), (10, 11, 8)], 
[(1, 10, 3), (1, 6, 2), (1, 3, 10), (3, 1, 10), (6, 2, 1), (10, 3, 1), (10, 1, 3), (1, 10, 6), (7, 5, 3)], 
[(11, 4, 1), (12, 2, 8), (11, 2, 12), (4, 11, 1), (11, 1, 4), (4, 1, 11), (12, 11, 2), (5, 8, 2), (8, 12, 2)], 
[(6, 1, 2), (3, 6, 1), (6, 1, 3), (1, 2, 7), (3, 6, 2), (1, 3, 6), (2, 6, 7), (7, 1, 2), (1, 2, 6)], 
[(4, 8, 7), (3, 4, 8), (4, 8, 3), (7, 4, 8), (4, 3, 8), (1, 8, 7), (3, 8, 4), (3, 8, 7), (8, 7, 2)], 
[(3, 12, 1), (1, 3, 12), (12, 1, 3), (3, 1, 12), (8, 3, 12), (11, 12, 8), (1, 8, 12), (12, 1, 8), (12, 3, 1)], 
[(2, 6, 11), (11, 2, 6), (6, 11, 2), (11, 6, 2), (6, 2, 11), (9, 6, 11), (2, 11, 6), (11, 9, 6), (11, 6, 9)], 
[(8, 7, 2), (8, 2, 7), (2, 7, 8), (8, 6, 7), (6, 8, 7), (7, 6, 4), (6, 7, 8), (7, 8, 6), (2, 8, 7)], 
[(2, 8, 3), (8, 3, 2), (12, 11, 3), (3, 12, 11), (12, 3, 11), (11, 3, 12), (2, 3, 8), (11, 12, 10), (12, 11, 10)]]

res = list()

for l in best_teams:
    pairs = permutations(l, 2)
    res.append(mean([assess_hp_diversity(x[0], x[1]) for x in pairs]))

print(mean(res))
res

0.8287037037037037


[0.7685185185185185,
 0.8703703703703703,
 0.861111111111111,
 0.8888888888888888,
 0.8425925925925926,
 0.787037037037037,
 0.8148148148148148,
 0.787037037037037,
 0.8148148148148148,
 0.8518518518518519]