# EVAC-2 Code And Details
This notebook contains all code and explanations of said code. Code is separated into blocks, organised and split by purpose of code. 

The method used for this assessment to evolve agents throughout gameplay was a neural network, where weights of said neural network are evolved generation by generation. Further detail can be found at the relevant code sections within this report.

## Representation of Agent Behaviour


### Neural Network
The neural network used by all agents in the system is a single hidden layer network with 4 input nodes, 8 hidden nodes and 4 output nodes. These 4 output nodes represent the decision of which group to join, and a group is selected using a softmax function. The input nodes of this neural network are the agents own group, the agents own wealth, the opponents group and the opponents wealth. These inputs were settled on after experimentation with other inputs such as the number of games the agent had played, or the current game number. These additional inputs did not improve performance and were removed to reduce chance of overfitting.

A bias of 1 was added to the input layer to improve performance.

In [251]:
import numpy as np

numInputNodes = 4
numHiddenNodes = 8
numOutputNodes = 4

IND_SIZE = ((numInputNodes+1) * numHiddenNodes) +  + (numHiddenNodes * numOutputNodes)

class NeuralNetwork(object):
  def __init__(self, numInput, numHidden, numOutput):
    self.numInput = numInput + 1
    self.numHidden = numHidden
    self.numOutput = numOutput

    self.wh = np.random.randn(self.numHidden, self.numInput) 
    self.wo = np.random.randn(self.numOutput, self.numHidden)

    self.ReLU = lambda x : max(0,x)

  def softmax(self, x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

  def feedForward(self, inputs):
    inputsBias = inputs[:]
    inputsBias.insert(len(inputs), 1)

    h1 = np.dot(self.wh, inputsBias)
    h1 = [self.ReLU(x) for x in h1]

    output = np.dot(self.wo, h1)
    return self.softmax(output)

  def getWeightsLinear(self):
    flat_wh = list(self.wh.flatten())
    flat_wo = list(self.wo.flatten())
    return( flat_wh + flat_wo )

  def setWeightsLinear(self, Wgenome):
    numWeights_IH = self.numHidden * (self.numInput)
    self.wh = np.array(Wgenome[:numWeights_IH])
    self.wh = self.wh.reshape((self.numHidden, self.numInput))
    self.wo = np.array(Wgenome[numWeights_IH:])
    self.wo = self.wo.reshape((self.numOutput, self.numHidden))

In [252]:
import numpy as np

class player():
  def __init__(self, IND_SIZE, group):
    self.wealth = 0
    self.startingGroup = group
    self.group = self.startingGroup
    self.gameCount = 0
    self.weights = tools.initRepeat(list, toolbox.attr_float, IND_SIZE)
    self.fitness = 0
  
  def evaluate(self,opponent,network):
    output = network.feedForward([self.wealth, self.group, opponent.wealth, opponent.group])
    decision = np.argmax(output, axis=0)
    if (self.group != decision):
      self.group = decision
    self.fitness = self.wealth / self.gameCount

  def addPayoff(self, opponent):
    payoffs = [[4,0,4,0],[6,4,6,1],[4,0,1,0],[6,1,6,0]]
    self.wealth += payoffs[self.group][opponent.group]
    self.gameCount += 1

  def reset(self):
    self.wealth = 0
    self.gameCount = 0
    self.group = self.startingGroup
    self.fitness = 0

In [253]:
from IPython.display import clear_output
%matplotlib inline
import matplotlib.pyplot as plt

fig = plt.figure()
labels = ["Saints", "Buddies", "Fight Club", "Vandals"]
xaxes = [[],[],[],[]]
yaxes = [[],[],[],[]]
INTERVAL = 1

def plotGroups(groups, tick):
  if (tick % INTERVAL == 0): 
    clear_output(wait=True)
    plt.cla()
  for i in range(0,len(groups)):
    xaxes[i].append(tick)
    yaxes[i].append(groups[i])
    if (tick % INTERVAL == 0): plt.plot(xaxes[i], yaxes[i])#, label=labels[i])
  if (tick % INTERVAL == 0):
    plt.legend(labels, loc='best', fancybox=True, framealpha=0.5)
    plt.pause(0.05)

<Figure size 432x288 with 0 Axes>

In [254]:
from IPython.display import clear_output
%matplotlib inline
import matplotlib.pyplot as plt

fig = plt.figure()
labels = ["Mean", "Min", "Max"]
xaxes = [[],[],[]]
yaxes = [[],[],[]]
INTERVAL = 1

def plotFitness(data, tick):
  if (tick % INTERVAL == 0): 
    clear_output(wait=True)
    plt.cla()
  yaxes[0].append(data["mean"])
  yaxes[1].append(data["min"])
  yaxes[2].append(data["max"])
  for i in range(0,len(yaxes)):
    xaxes[i].append(tick)
    if (tick % INTERVAL == 0): plt.plot(xaxes[i], yaxes[i])#, label=labels[i])
  if (tick % INTERVAL == 0):
    plt.legend(labels, loc='best', fancybox=True, framealpha=0.5)
    plt.pause(0.05)

<Figure size 432x288 with 0 Axes>

In [255]:
def sumGroups(groups):
  groupTotals=[0,0,0,0]
  for person in groups:
    groupTotals[person.group] += person.fitness
  return groupTotals

def avgGroups(groups):
  groupTotals=[0,0,0,0]
  counts=[0,0,0,0]
  for person in groups:
    if (person.gameCount > 0):
      groupTotals[person.startingGroup] += person.wealth
      counts[person.startingGroup] += 1
  for i in range(0,4):
    if(groupTotals[i] > 0):
      groupTotals[i] = groupTotals[i] / counts[i] 
  return groupTotals

def countGroups(pop):
  groupTotals=[0,0,0,0]
  for person in pop:
    groupTotals[person.group] += 1
  return groupTotals

def fitnessStats(pop):
  fitnessSum = 0
  fitnessMax = 0 
  fitnessMin = -1
  for person in pop:
    if(person.fitness > fitnessMax):
      fitnessMax = person.fitness
    if(person.fitness < fitnessMin or fitnessMin == -1):
      fitnessMin = person.fitness
    fitnessSum += person.fitness
  return { "mean": fitnessSum/len(pop), "max":fitnessMax, "min":fitnessMin }

In [256]:
from deap import base
from deap import tools
import random

toolbox = base.Toolbox()

toolbox.register("attr_float", random.uniform, -1.0, 1.0)
toolbox.register("saint", player, IND_SIZE, 0)
toolbox.register("buddy", player, IND_SIZE, 1)
toolbox.register("fighter", player, IND_SIZE, 2)
toolbox.register("vandal", player, IND_SIZE, 3)

toolbox.register("mate", tools.cxOnePoint)

toolbox.register("select", tools.selTournament, tournsize=5)

toolbox.register("mutate", tools.mutGaussian, mu=0.0, sigma=0.5, indpb=0.1)
toolbox.register("saints", tools.initRepeat, list, toolbox.saint)
toolbox.register("buddies", tools.initRepeat, list, toolbox.buddy)
toolbox.register("fightClub", tools.initRepeat, list, toolbox.fighter)
toolbox.register("vandals", tools.initRepeat, list, toolbox.vandal)

NGEN = 50
NGAMES = 10000
CXPB = 0.5
MUTPB = 1
POP = 4*400

network = NeuralNetwork(numInputNodes, numHiddenNodes, numOutputNodes)

pop = toolbox.saints(n=POP//4) + toolbox.buddies(n=POP//4) + toolbox.fightClub(n=POP//4) + toolbox.vandals(n=POP//4)
for g in range(NGEN):
  print("-- Generation %i --" % g)
  offspring = toolbox.select(pop[:len(pop)//4], len(pop)//4) + \
    toolbox.select(pop[len(pop)//4:(len(pop)//4)*2], len(pop)//4) + \
    toolbox.select(pop[(len(pop)//4)*2:(len(pop)//4)*3], len(pop)//4) + \
    toolbox.select(pop[(len(pop)//4)*3:len(pop)], len(pop)//4)

  for person in offspring:
    person.reset()

  for child1, child2 in zip(offspring[::2], offspring[1::2]):
    if random.random() < CXPB:
      toolbox.mate(child1.weights, child2.weights)

  for mutant in offspring:
    if random.random() < MUTPB:
      toolbox.mutate(mutant.weights)

  for r in range(NGAMES):
    selection = random.sample(range(POP),2)
    offspring[selection[0]].addPayoff(offspring[selection[1]])
    offspring[selection[1]].addPayoff(offspring[selection[0]])
    offspring[selection[0]].evaluate(offspring[selection[1]],network)
    offspring[selection[1]].evaluate(offspring[selection[0]],network)
  
  pop = offspring
  print("Group avg    : " + str(avgGroups(pop)))
  print("Group count  : "+str(countGroups(pop)))

'''for person in pop:
  person.reset()

Group avg    : [5008.0, 10483.0, 3138.0, 8175.0]
Group count  : [400, 400, 400, 400]

counter=0
for r in range(20000):
  selection = random.sample(range(POP),2)
  pop[selection[0]].addPayoff(pop[selection[1]])
  pop[selection[1]].addPayoff(pop[selection[0]])
  pop[selection[0]].evaluate(pop[selection[1]],network,r)
  pop[selection[1]].evaluate(pop[selection[0]],network,r)
  groupCounts = countGroups(pop)
  #print(groupCounts)
  plotGroups(groupCounts,r)
  if (groupCounts.count(0) == 3):
    counter+=1
    if(counter>20):
      break'''

-- Generation 0 --
Group avg    : [56.265, 61.7125, 58.315, 57.07]
Group count  : [0, 1331, 0, 269]
-- Generation 1 --
Group avg    : [133.6475, 123.8075, 138.4375, 135.0525]
Group count  : [0, 1201, 0, 399]
-- Generation 2 --
Group avg    : [263.5175, 258.8975, 288.085, 260.9625]
Group count  : [0, 1169, 0, 431]
-- Generation 3 --
Group avg    : [570.1725, 429.4025, 475.7925, 517.2775]
Group count  : [0, 1209, 0, 391]
-- Generation 4 --
Group avg    : [881.435, 712.695, 782.345, 816.6025]
Group count  : [0, 1402, 0, 198]
-- Generation 5 --
Group avg    : [1331.4125, 1043.1375, 1175.46, 1353.4625]
Group count  : [0, 1185, 0, 415]
-- Generation 6 --
Group avg    : [2049.8925, 2269.765, 1715.8125, 2278.345]
Group count  : [0, 1390, 0, 210]
-- Generation 7 --
Group avg    : [3625.51, 4102.2425, 3056.9475, 4273.65]
Group count  : [0, 887, 0, 713]
-- Generation 8 --
Group avg    : [5718.495, 7164.86, 5584.14, 6256.16]
Group count  : [0, 1527, 0, 73]
-- Generation 9 --
Group avg    : [6412.2

KeyboardInterrupt: 